use std::{
cmp::{max, min},
num::{NonZeroU8, NonZeroUsize},
ptr::slice_from_raw_parts_mut,
};
use anyhow::Result;
use bitflags::bitflags;
use semisafe::slice::get as semisafe_get;
use semisafe::slice::get_mut as semisafe_get_mut;
use crate::{
dct::DctHelper,
fake::group_of_planes::FakeGroupOfPlanes,
frame::FramePlanes,
mv::{CheckMVFlags, MotionVector},
mv_frame::MVFrame,
params::{DctMode, DivideMode, MVPlaneSet, MotionFlags, PenaltyScaling, SearchType, Subpel},
util::{BlockFunctions, Pixel, median, select_block_functions},
};
const MAX_PREDICTOR: usize = 5;
#[allow(dead_code)]
pub(crate) struct PlaneOfBlocks<T: Pixel> {
pub pel: Subpel,
pub log_pel: u8,
pub log_scale: usize,
pub scale: usize,
pub blk_size_x: NonZeroUsize,
pub blk_size_y: NonZeroUsize,
pub overlap_x: usize,
pub overlap_y: usize,
pub blk_x: NonZeroUsize,
pub blk_y: NonZeroUsize,
pub blk_count: NonZeroUsize,
pub x_ratio_uv: NonZeroU8,
pub y_ratio_uv: NonZeroU8,
pub log_x_ratio_uv: u8,
pub log_y_ratio_uv: u8,
pub bits_per_sample: NonZeroU8,
pub smallest_plane: bool,
pub chroma: bool,
pub can_use_satd: bool,
pub global_mv_predictor: MotionVector,
pub vectors: Vec<MotionVector>,
pub dct_pitch: NonZeroUsize,
pub freq_size: NonZeroUsize,
pub freq_array: Vec<i32>,
pub very_big_sad: NonZeroUsize,
block_fns: BlockFunctions,
dct: Option<DctHelper>,
dct_src: Box<[T]>,
dct_ref: Box<[T]>,
src_pitch_temp: [NonZeroUsize; 3],
src_temp: [Box<[T]>; 3],
dct_mode: Option<DctMode>,
dct_weight_16: u32,
bad_sad: u64,
bad_range: i32,
zero_mv_field_shifted: MotionVector,
x: [i32; 3],
y: [i32; 3],
src_pitch: [NonZeroUsize; 3],
ref_pitch: [NonZeroUsize; 3],
search_type: SearchType,
search_param: i32,
penalty_zero: u16,
penalty_global: u16,
penalty_new: u16,
bad_count: usize,
try_many: bool,
blk_scan_dir: i8,
lambda: u32,
lambda_sad: u32,
dx_max: i32,
dy_max: i32,
dx_min: i32,
dy_min: i32,
predictor: MotionVector,
predictors: [MotionVector; MAX_PREDICTOR],
best_mv: MotionVector,
src_offset: [usize; 3],
src_luma: u64,
ref_luma: u64,
sum_luma_change: i64,
min_cost: i64,
blk_x_i: usize,
blk_y_i: usize,
blk_idx: usize,
}
impl<T: Pixel> PlaneOfBlocks<T> {
#[must_use]
pub fn new(
blk_x: NonZeroUsize,
blk_y: NonZeroUsize,
blk_size_x: NonZeroUsize,
blk_size_y: NonZeroUsize,
pel: Subpel,
level: usize,
motion_flags: MotionFlags,
overlap_x: usize,
overlap_y: usize,
x_ratio_uv: NonZeroU8,
y_ratio_uv: NonZeroU8,
bits_per_sample: NonZeroU8,
) -> Self {
debug_assert!(
bits_per_sample.get() as usize > (size_of::<T>() - 1) * 8
&& (bits_per_sample.get() as usize <= size_of::<T>() * 8)
);
let blk_count = blk_x.saturating_mul(blk_y);
let freq_size = unsafe { NonZeroUsize::new_unchecked(8192 * u8::from(pel) as usize * 2) };
let dct_pitch = blk_size_x;
let chroma_src_pitch =
unsafe { NonZeroUsize::new_unchecked(blk_size_x.get() / x_ratio_uv.get() as usize) };
let src_pitch_temp = [blk_size_x, chroma_src_pitch, chroma_src_pitch];
let (chroma_blk_x, chroma_blk_y) = unsafe {
(
NonZeroUsize::new_unchecked(blk_size_x.get() / x_ratio_uv.get() as usize),
NonZeroUsize::new_unchecked(blk_size_y.get() / y_ratio_uv.get() as usize),
)
};
let block_fns =
select_block_functions::<T>(blk_size_x, blk_size_y, chroma_blk_x, chroma_blk_y);
Self {
pel,
log_pel: u8::from(pel).ilog2() as u8,
log_scale: level,
scale: 2usize.pow(level as u32),
blk_size_x,
blk_size_y,
overlap_x,
overlap_y,
blk_x,
blk_y,
blk_count,
x_ratio_uv,
y_ratio_uv,
log_x_ratio_uv: x_ratio_uv.ilog2() as u8,
log_y_ratio_uv: y_ratio_uv.ilog2() as u8,
bits_per_sample,
smallest_plane: motion_flags.contains(MotionFlags::SMALLEST_PLANE),
chroma: motion_flags.contains(MotionFlags::USE_CHROMA_MOTION),
can_use_satd: !(blk_size_x.get() == 16 && blk_size_y.get() == 2),
global_mv_predictor: MotionVector::zero(),
vectors: vec![MotionVector::zero(); blk_count.get()],
dct_pitch,
freq_size,
freq_array: vec![0; freq_size.get()],
very_big_sad: blk_size_x
.saturating_mul(blk_size_y)
.saturating_mul(unsafe { NonZeroUsize::new_unchecked(1 << bits_per_sample.get()) }),
block_fns,
dct: None,
dct_src: vec![T::zero(); blk_size_y.get() * dct_pitch.get()].into_boxed_slice(),
dct_ref: vec![T::zero(); blk_size_y.get() * dct_pitch.get()].into_boxed_slice(),
src_pitch_temp,
src_temp: [
vec![T::zero(); blk_size_y.get() * src_pitch_temp[0].get()].into_boxed_slice(),
vec![
T::zero();
blk_size_y.get() / y_ratio_uv.get() as usize * src_pitch_temp[1].get()
]
.into_boxed_slice(),
vec![
T::zero();
blk_size_y.get() / y_ratio_uv.get() as usize * src_pitch_temp[2].get()
]
.into_boxed_slice(),
],
dct_mode: Default::default(),
dct_weight_16: Default::default(),
bad_sad: Default::default(),
bad_range: Default::default(),
zero_mv_field_shifted: Default::default(),
x: Default::default(),
y: Default::default(),
src_pitch: src_pitch_temp,
ref_pitch: src_pitch_temp,
search_type: SearchType::Hex2,
search_param: Default::default(),
penalty_zero: Default::default(),
penalty_global: Default::default(),
bad_count: Default::default(),
try_many: Default::default(),
sum_luma_change: Default::default(),
blk_scan_dir: Default::default(),
penalty_new: Default::default(),
lambda: Default::default(),
lambda_sad: Default::default(),
dx_max: Default::default(),
dy_max: Default::default(),
dx_min: Default::default(),
dy_min: Default::default(),
predictor: Default::default(),
predictors: Default::default(),
best_mv: Default::default(),
src_offset: Default::default(),
src_luma: Default::default(),
ref_luma: Default::default(),
min_cost: Default::default(),
blk_idx: Default::default(),
blk_x_i: Default::default(),
blk_y_i: Default::default(),
}
}
pub fn search_mvs<'a>(
&mut self,
out_idx: usize,
src_frame: &'a MVFrame,
src_frame_data: &'a FramePlanes<'a, T>,
ref_frame: &'a MVFrame,
ref_frame_data: &'a FramePlanes<'a, T>,
search_type: SearchType,
search_param: i32,
lambda: u32,
lambda_sad: u32,
penalty_new: u16,
penalty_level: PenaltyScaling,
out: &'a mut MvsOutput,
global_mv: &'a mut MotionVector,
field_shift: i32,
dct_mode: DctMode,
mean_luma_change: &'a mut i32,
penalty_zero: u16,
penalty_global: u16,
bad_sad: u64,
bad_range: i32,
meander: bool,
try_many: bool,
) -> Result<()> {
let args = SearchMvsArgs {
out_idx,
src_frame,
src_frame_data,
ref_frame,
ref_frame_data,
search_type,
search_param,
lambda,
lambda_sad,
penalty_new,
penalty_level,
out,
global_mv,
field_shift,
mean_luma_change,
penalty_zero,
penalty_global,
bad_sad,
bad_range,
meander,
try_many,
};
match (u8::from(dct_mode), self.log_pel) {
(0, 0) => self.search_mvs_internal::<0, 0>(args),
(1, 0) => self.search_mvs_internal::<1, 0>(args),
(2, 0) => self.search_mvs_internal::<2, 0>(args),
(3, 0) => self.search_mvs_internal::<3, 0>(args),
(4, 0) => self.search_mvs_internal::<4, 0>(args),
(5, 0) => self.search_mvs_internal::<5, 0>(args),
(6, 0) => self.search_mvs_internal::<6, 0>(args),
(7, 0) => self.search_mvs_internal::<7, 0>(args),
(8, 0) => self.search_mvs_internal::<8, 0>(args),
(9, 0) => self.search_mvs_internal::<9, 0>(args),
(10, 0) => self.search_mvs_internal::<10, 0>(args),
(0, 1) => self.search_mvs_internal::<0, 1>(args),
(1, 1) => self.search_mvs_internal::<1, 1>(args),
(2, 1) => self.search_mvs_internal::<2, 1>(args),
(3, 1) => self.search_mvs_internal::<3, 1>(args),
(4, 1) => self.search_mvs_internal::<4, 1>(args),
(5, 1) => self.search_mvs_internal::<5, 1>(args),
(6, 1) => self.search_mvs_internal::<6, 1>(args),
(7, 1) => self.search_mvs_internal::<7, 1>(args),
(8, 1) => self.search_mvs_internal::<8, 1>(args),
(9, 1) => self.search_mvs_internal::<9, 1>(args),
(10, 1) => self.search_mvs_internal::<10, 1>(args),
(0, 2) => self.search_mvs_internal::<0, 2>(args),
(1, 2) => self.search_mvs_internal::<1, 2>(args),
(2, 2) => self.search_mvs_internal::<2, 2>(args),
(3, 2) => self.search_mvs_internal::<3, 2>(args),
(4, 2) => self.search_mvs_internal::<4, 2>(args),
(5, 2) => self.search_mvs_internal::<5, 2>(args),
(6, 2) => self.search_mvs_internal::<6, 2>(args),
(7, 2) => self.search_mvs_internal::<7, 2>(args),
(8, 2) => self.search_mvs_internal::<8, 2>(args),
(9, 2) => self.search_mvs_internal::<9, 2>(args),
(10, 2) => self.search_mvs_internal::<10, 2>(args),
_ => unreachable!(),
}
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(
skip_all,
name = "plane_of_blocks::search_mvs_internal",
fields(DCT_MODE, LOG_PEL)
)
)]
fn search_mvs_internal<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
args: SearchMvsArgs<'_, T>,
) -> Result<()> {
let SearchMvsArgs {
out_idx,
src_frame,
src_frame_data,
ref_frame,
ref_frame_data,
search_type,
search_param,
lambda,
lambda_sad,
penalty_new,
penalty_level,
out,
global_mv,
field_shift,
mean_luma_change,
penalty_zero,
penalty_global,
bad_sad,
bad_range,
meander,
try_many,
} = args;
if (1..=4).contains(&DCT_MODE) {
self.dct = Some(DctHelper::new(
self.blk_size_x,
self.blk_size_y,
self.bits_per_sample,
)?);
}
self.dct_mode = Some(DctMode::try_from(DCT_MODE as i64).expect("DCT_MODE must be valid"));
self.dct_weight_16 = min(
16,
mean_luma_change.unsigned_abs()
/ (self.blk_size_x.get() * self.blk_size_y.get()) as u32,
);
self.bad_sad = bad_sad;
self.bad_range = bad_range;
self.zero_mv_field_shifted = MotionVector {
x: 0,
y: field_shift,
sad: 0,
};
self.global_mv_predictor = MotionVector {
x: (1 << LOG_PEL) * global_mv.x,
y: (1 << LOG_PEL) * global_mv.y + field_shift,
sad: global_mv.sad,
};
let plane_data_size =
(size_of::<i32>() + self.blk_count.get() * size_of::<MotionVector>()) as i32;
semisafe_get_mut(
semisafe_get_mut(&mut out.block_data, out_idx..),
..size_of::<i32>(),
)
.copy_from_slice(&plane_data_size.to_le_bytes());
self.search_type = search_type;
self.search_param = search_param;
self.penalty_zero = penalty_zero;
self.penalty_global = penalty_global;
self.bad_count = 0;
self.try_many = try_many;
self.sum_luma_change = 0;
let blk_data: &mut [MotionVector] = unsafe {
let data = semisafe_get_mut(&mut out.block_data, (out_idx + size_of::<i32>())..);
&mut *slice_from_raw_parts_mut(
data.as_mut_ptr().cast(),
data.len() / size_of::<MotionVector>(),
)
};
self.y[0] = semisafe_get(&src_frame.planes, 0).vpad as i32;
if (src_frame.yuv_mode & MVPlaneSet::UPLANE).bits() > 0 {
self.y[1] = semisafe_get(&src_frame.planes, 1).vpad as i32;
}
if (src_frame.yuv_mode & MVPlaneSet::VPLANE).bits() > 0 {
self.y[2] = semisafe_get(&src_frame.planes, 2).vpad as i32;
}
self.src_pitch[0] = semisafe_get(&src_frame.planes, 0).stride;
if self.chroma {
self.src_pitch[1] = semisafe_get(&src_frame.planes, 1).stride;
self.src_pitch[2] = semisafe_get(&src_frame.planes, 2).stride;
}
self.ref_pitch[0] = semisafe_get(&ref_frame.planes, 0).stride;
if self.chroma {
self.ref_pitch[1] = semisafe_get(&ref_frame.planes, 1).stride;
self.ref_pitch[2] = semisafe_get(&ref_frame.planes, 2).stride;
}
let mut lambda_level = lambda / (1u32 << LOG_PEL).pow(2);
if penalty_level == PenaltyScaling::Linear {
lambda_level *= self.scale as u32;
} else if penalty_level == PenaltyScaling::Quadratic {
lambda_level *= self.scale.pow(2) as u32;
}
let ref_plane_y: &[T] = if self.smallest_plane {
ref_frame_data.plane(0)?
} else {
&[]
};
let src_plane_y = src_frame_data.plane(0)?;
for blk_y in 0..self.blk_y.get() {
self.blk_y_i = blk_y;
self.blk_scan_dir = if blk_y % 2 == 0 || !meander { 1 } else { -1 };
let blk_x_start: i32 = if blk_y % 2 == 0 || !meander {
0
} else {
self.blk_x.get() as i32 - 1
};
if self.blk_scan_dir == 1 {
self.x[0] = semisafe_get(&src_frame.planes, 0).hpad as i32;
if self.chroma {
self.x[1] = semisafe_get(&src_frame.planes, 1).hpad as i32;
self.x[2] = semisafe_get(&src_frame.planes, 2).hpad as i32;
}
} else {
self.x[0] = (semisafe_get(&src_frame.planes, 0).hpad
+ (self.blk_size_x.get() - self.overlap_x) * (self.blk_x.get() - 1))
as i32;
if self.chroma {
self.x[1] = (semisafe_get(&src_frame.planes, 1).hpad
+ (self.blk_size_x.get() - self.overlap_x) / self.x_ratio_uv.get() as usize
* (self.blk_x.get() - 1)) as i32;
self.x[2] = (semisafe_get(&src_frame.planes, 2).hpad
+ (self.blk_size_x.get() - self.overlap_x) / self.x_ratio_uv.get() as usize
* (self.blk_x.get() - 1)) as i32;
}
}
for iblk_x in 0..self.blk_x.get() {
let blk_x =
(blk_x_start as isize + iblk_x as isize * self.blk_scan_dir as isize) as usize;
self.blk_x_i = blk_x;
self.blk_idx = (self.blk_y_i * self.blk_x.get()) + blk_x;
self.src_offset[0] =
semisafe_get(&src_frame.planes, 0).get_pel_pix_offset(self.x[0], self.y[0]);
if self.chroma {
self.src_offset[1] =
semisafe_get(&src_frame.planes, 1).get_pel_pix_offset(self.x[1], self.y[1]);
self.src_offset[2] =
semisafe_get(&src_frame.planes, 2).get_pel_pix_offset(self.x[2], self.y[2]);
}
self.src_pitch[0] = semisafe_get(&src_frame.planes, 0).stride;
if self.chroma {
self.src_pitch[1] = semisafe_get(&src_frame.planes, 1).stride;
self.src_pitch[2] = semisafe_get(&src_frame.planes, 2).stride;
}
self.lambda = if blk_y == 0 { 0 } else { lambda_level };
self.penalty_new = penalty_new;
self.lambda_sad = lambda_sad;
let hpad_scaled = semisafe_get(&src_frame.planes, 0).hpad >> self.log_scale;
let vpad_scaled = semisafe_get(&src_frame.planes, 0).vpad >> self.log_scale;
self.dx_max = (semisafe_get(&src_frame.planes, 0).padded_width.get() as i32
- self.x[0]
- self.blk_size_x.get() as i32
- semisafe_get(&src_frame.planes, 0).hpad as i32
+ hpad_scaled as i32)
<< LOG_PEL;
self.dy_max = (semisafe_get(&src_frame.planes, 0).padded_height.get() as i32
- self.y[0]
- self.blk_size_y.get() as i32
- semisafe_get(&src_frame.planes, 0).vpad as i32
+ vpad_scaled as i32)
<< LOG_PEL;
self.dx_min = -((self.x[0] - semisafe_get(&src_frame.planes, 0).hpad as i32
+ hpad_scaled as i32)
<< LOG_PEL);
self.dy_min = -((self.y[0] - semisafe_get(&src_frame.planes, 0).vpad as i32
+ vpad_scaled as i32)
<< LOG_PEL);
self.predictor = self.clip_mv(*semisafe_get(&self.vectors, self.blk_idx));
self.predictors[4] = self.clip_mv(MotionVector::zero());
self.pseudo_epz_search::<DCT_MODE, LOG_PEL>(
src_frame_data,
ref_frame,
ref_frame_data,
)?;
*semisafe_get_mut(blk_data, self.blk_idx) = self.best_mv;
if self.smallest_plane {
unsafe {
self.sum_luma_change += (self.block_fns.luma_sum)(
self.get_ref_block::<LOG_PEL>(ref_frame, ref_plane_y, 0, 0)
.as_ptr()
.cast(),
self.ref_pitch[0],
) as i64
- (self.block_fns.luma_sum)(
semisafe_get(src_plane_y, self.src_offset[0]..)
.as_ptr()
.cast(),
self.src_pitch[0],
) as i64;
}
}
if iblk_x < self.blk_x.get() - 1 {
self.x[0] +=
(self.blk_size_x.get() - self.overlap_x) as i32 * self.blk_scan_dir as i32;
if (src_frame.yuv_mode & MVPlaneSet::UPLANE).bits() > 0 {
self.x[1] += ((self.blk_size_x.get() - self.overlap_x)
>> self.log_x_ratio_uv) as i32
* self.blk_scan_dir as i32;
}
if (src_frame.yuv_mode & MVPlaneSet::VPLANE).bits() > 0 {
self.x[2] += ((self.blk_size_x.get() - self.overlap_x)
>> self.log_x_ratio_uv) as i32
* self.blk_scan_dir as i32;
}
}
}
self.y[0] += (self.blk_size_y.get() - self.overlap_y) as i32;
if (src_frame.yuv_mode & MVPlaneSet::UPLANE).bits() > 0 {
self.y[1] +=
((self.blk_size_y.get() - self.overlap_y) >> self.log_y_ratio_uv) as i32;
}
if (src_frame.yuv_mode & MVPlaneSet::VPLANE).bits() > 0 {
self.y[2] +=
((self.blk_size_y.get() - self.overlap_y) >> self.log_y_ratio_uv) as i32;
}
}
if self.smallest_plane {
*mean_luma_change = (self.sum_luma_change / self.blk_count.get() as i64) as i32;
}
Ok(())
}
pub(crate) fn recalculate_mvs<'a>(
&mut self,
out_idx: usize,
fake_gop: &FakeGroupOfPlanes,
src_frame: &'a MVFrame,
src_frame_data: &'a FramePlanes<'a, T>,
ref_frame: &'a MVFrame,
ref_frame_data: &'a FramePlanes<'a, T>,
search_type: SearchType,
search_param: i32,
lambda: u32,
penalty_new: u16,
out: &'a mut MvsOutput,
field_shift: i32,
th_sad: u64,
dct_helper: Option<DctHelper>,
dct_mode: DctMode,
smooth: bool,
meander: bool,
) -> Result<()> {
let args = RecalculateMvsArgs {
out_idx,
fake_gop,
src_frame,
src_frame_data,
ref_frame,
ref_frame_data,
search_type,
search_param,
lambda,
penalty_new,
out,
field_shift,
th_sad,
dct_helper,
dct_mode,
smooth,
meander,
};
match (u8::from(dct_mode), self.log_pel) {
(0, 0) => self.recalculate_mvs_internal::<0, 0>(args),
(1, 0) => self.recalculate_mvs_internal::<1, 0>(args),
(2, 0) => self.recalculate_mvs_internal::<2, 0>(args),
(3, 0) => self.recalculate_mvs_internal::<3, 0>(args),
(4, 0) => self.recalculate_mvs_internal::<4, 0>(args),
(5, 0) => self.recalculate_mvs_internal::<5, 0>(args),
(6, 0) => self.recalculate_mvs_internal::<6, 0>(args),
(7, 0) => self.recalculate_mvs_internal::<7, 0>(args),
(8, 0) => self.recalculate_mvs_internal::<8, 0>(args),
(9, 0) => self.recalculate_mvs_internal::<9, 0>(args),
(10, 0) => self.recalculate_mvs_internal::<10, 0>(args),
(0, 1) => self.recalculate_mvs_internal::<0, 1>(args),
(1, 1) => self.recalculate_mvs_internal::<1, 1>(args),
(2, 1) => self.recalculate_mvs_internal::<2, 1>(args),
(3, 1) => self.recalculate_mvs_internal::<3, 1>(args),
(4, 1) => self.recalculate_mvs_internal::<4, 1>(args),
(5, 1) => self.recalculate_mvs_internal::<5, 1>(args),
(6, 1) => self.recalculate_mvs_internal::<6, 1>(args),
(7, 1) => self.recalculate_mvs_internal::<7, 1>(args),
(8, 1) => self.recalculate_mvs_internal::<8, 1>(args),
(9, 1) => self.recalculate_mvs_internal::<9, 1>(args),
(10, 1) => self.recalculate_mvs_internal::<10, 1>(args),
(0, 2) => self.recalculate_mvs_internal::<0, 2>(args),
(1, 2) => self.recalculate_mvs_internal::<1, 2>(args),
(2, 2) => self.recalculate_mvs_internal::<2, 2>(args),
(3, 2) => self.recalculate_mvs_internal::<3, 2>(args),
(4, 2) => self.recalculate_mvs_internal::<4, 2>(args),
(5, 2) => self.recalculate_mvs_internal::<5, 2>(args),
(6, 2) => self.recalculate_mvs_internal::<6, 2>(args),
(7, 2) => self.recalculate_mvs_internal::<7, 2>(args),
(8, 2) => self.recalculate_mvs_internal::<8, 2>(args),
(9, 2) => self.recalculate_mvs_internal::<9, 2>(args),
(10, 2) => self.recalculate_mvs_internal::<10, 2>(args),
_ => unreachable!(),
}
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(
skip_all,
name = "plane_of_blocks::recalculate_mvs_internal",
fields(DCT_MODE, LOG_PEL)
)
)]
fn recalculate_mvs_internal<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
args: RecalculateMvsArgs<'_, T>,
) -> Result<()> {
let RecalculateMvsArgs {
out_idx,
fake_gop,
src_frame,
src_frame_data,
ref_frame,
ref_frame_data,
search_type,
search_param,
lambda,
penalty_new,
out,
field_shift,
th_sad,
dct_helper,
dct_mode,
smooth,
meander,
} = args;
self.dct = dct_helper;
self.dct_mode = Some(dct_mode);
self.dct_weight_16 = 8;
self.zero_mv_field_shifted.x = 0;
self.zero_mv_field_shifted.y = field_shift;
self.global_mv_predictor.x = 0;
self.global_mv_predictor.y = field_shift;
self.global_mv_predictor.sad = i64::MAX;
let plane_data_size =
(size_of::<i32>() + self.blk_count.get() * size_of::<MotionVector>()) as i32;
semisafe_get_mut(
semisafe_get_mut(&mut out.block_data, out_idx..),
..size_of::<i32>(),
)
.copy_from_slice(&plane_data_size.to_le_bytes());
let blk_data: &mut [MotionVector] = unsafe {
let data = semisafe_get_mut(&mut out.block_data, (out_idx + size_of::<i32>())..);
&mut *slice_from_raw_parts_mut(
data.as_mut_ptr().cast(),
data.len() / size_of::<MotionVector>(),
)
};
self.y[0] = semisafe_get(&src_frame.planes, 0).vpad as i32;
if (src_frame.yuv_mode & MVPlaneSet::UPLANE).bits() > 0 {
self.y[1] = semisafe_get(&src_frame.planes, 1).vpad as i32;
}
if (src_frame.yuv_mode & MVPlaneSet::VPLANE).bits() > 0 {
self.y[2] = semisafe_get(&src_frame.planes, 2).vpad as i32;
}
self.src_pitch[0] = semisafe_get(&src_frame.planes, 0).stride;
if self.chroma {
self.src_pitch[1] = semisafe_get(&src_frame.planes, 1).stride;
self.src_pitch[2] = semisafe_get(&src_frame.planes, 2).stride;
}
self.ref_pitch[0] = semisafe_get(&ref_frame.planes, 0).stride;
if self.chroma {
self.ref_pitch[1] = semisafe_get(&ref_frame.planes, 1).stride;
self.ref_pitch[2] = semisafe_get(&ref_frame.planes, 2).stride;
}
self.search_type = search_type;
self.search_param = search_param;
let lambda_level = lambda / (1u32 << LOG_PEL).pow(2);
let src_plane_y = src_frame_data.plane(0)?;
let src_plane_u: &[T] = if self.chroma {
src_frame_data.plane(1)?
} else {
&[]
};
let src_plane_v: &[T] = if self.chroma {
src_frame_data.plane(2)?
} else {
&[]
};
let ref_plane_y: &[T] = ref_frame_data.plane(0)?;
let ref_plane_u: &[T] = if self.chroma {
ref_frame_data.plane(1)?
} else {
&[]
};
let ref_plane_v: &[T] = if self.chroma {
ref_frame_data.plane(2)?
} else {
&[]
};
let plane = semisafe_get(&fake_gop.planes, 0);
let blk_x_old = plane.blk_x.get();
let blk_y_old = plane.blk_y.get();
let blk_size_x_old = plane.blk_size_x.get();
let blk_size_y_old = plane.blk_size_y.get();
let step_x_old = blk_size_x_old - plane.overlap_x;
let step_y_old = blk_size_y_old - plane.overlap_y;
let log_pel_old = (plane.pel as i32).ilog2();
for blk_y in 0..self.blk_y.get() {
self.blk_y_i = blk_y;
self.blk_scan_dir = if blk_y % 2 == 0 || !meander { 1 } else { -1 };
let blk_x_start: i32 = if blk_y % 2 == 0 || !meander {
0
} else {
self.blk_x.get() as i32 - 1
};
if self.blk_scan_dir == 1 {
self.x[0] = semisafe_get(&src_frame.planes, 0).hpad as i32;
if self.chroma {
self.x[1] = semisafe_get(&src_frame.planes, 1).hpad as i32;
self.x[2] = semisafe_get(&src_frame.planes, 2).hpad as i32;
}
} else {
self.x[0] = (semisafe_get(&src_frame.planes, 0).hpad
+ (self.blk_size_x.get() - self.overlap_x) * (self.blk_x.get() - 1))
as i32;
if self.chroma {
self.x[1] = (semisafe_get(&src_frame.planes, 1).hpad
+ (self.blk_size_x.get() - self.overlap_x) / self.x_ratio_uv.get() as usize
* (self.blk_x.get() - 1)) as i32;
self.x[2] = (semisafe_get(&src_frame.planes, 2).hpad
+ (self.blk_size_x.get() - self.overlap_x) / self.x_ratio_uv.get() as usize
* (self.blk_x.get() - 1)) as i32;
}
}
for iblk_x in 0..self.blk_x.get() {
self.blk_x_i =
(blk_x_start as isize + iblk_x as isize * self.blk_scan_dir as isize) as usize;
self.blk_idx = (self.blk_y_i * self.blk_x.get()) + self.blk_x_i;
self.src_offset[0] =
semisafe_get(&src_frame.planes, 0).get_pel_pix_offset(self.x[0], self.y[0]);
if self.chroma {
self.src_offset[1] =
semisafe_get(&src_frame.planes, 1).get_pel_pix_offset(self.x[1], self.y[1]);
self.src_offset[2] =
semisafe_get(&src_frame.planes, 2).get_pel_pix_offset(self.x[2], self.y[2]);
}
self.src_pitch[0] = semisafe_get(&src_frame.planes, 0).stride;
if self.chroma {
self.src_pitch[1] = semisafe_get(&src_frame.planes, 1).stride;
self.src_pitch[2] = semisafe_get(&src_frame.planes, 2).stride;
}
self.lambda = if blk_y == 0 { 0 } else { lambda_level };
self.penalty_new = penalty_new;
self.dx_max = (semisafe_get(&src_frame.planes, 0).padded_width.get() as i32
- self.x[0]
- self.blk_size_x.get() as i32)
<< LOG_PEL;
self.dy_max = (semisafe_get(&src_frame.planes, 0).padded_height.get() as i32
- self.y[0]
- self.blk_size_y.get() as i32)
<< LOG_PEL;
self.dx_min = -(self.x[0] << LOG_PEL);
self.dy_min = -(self.y[0] << LOG_PEL);
let mut vector_old = {
let center_x = self.blk_size_x.get() / 2
+ (self.blk_size_x.get() - self.overlap_x) * self.blk_x_i;
let blk_x_old_center =
(center_x as i32 - blk_size_x_old as i32 / 2) / step_x_old as i32;
let center_y = self.blk_size_y.get() / 2
+ (self.blk_size_y.get() - self.overlap_y) * self.blk_y_i;
let blk_y_old_center =
(center_y as i32 - blk_size_y_old as i32 / 2) / step_y_old as i32;
let delta_x = max(
0,
center_x as i32
- (blk_size_x_old as i32 / 2 + step_x_old as i32 * blk_x_old_center),
);
let delta_y = max(
0,
center_y as i32
- (blk_size_y_old as i32 / 2 + step_y_old as i32 * blk_y_old_center),
);
let blk_x_old1 = min(blk_x_old as i32 - 1, max(0, blk_x_old_center));
let blk_x_old2 = min(blk_x_old as i32 - 1, max(0, blk_x_old_center + 1));
let blk_y_old1 = min(blk_y_old as i32 - 1, max(0, blk_y_old_center));
let blk_y_old2 = min(blk_y_old as i32 - 1, max(0, blk_y_old_center + 1));
if smooth {
let vector_old1 = fake_gop
.get_block(0, (blk_x_old1 + blk_y_old1 * blk_x_old as i32) as usize)
.vector;
let vector_old2 = fake_gop
.get_block(0, (blk_x_old2 + blk_y_old1 * blk_x_old as i32) as usize)
.vector;
let vector_old3 = fake_gop
.get_block(0, (blk_x_old1 + blk_y_old2 * blk_x_old as i32) as usize)
.vector;
let vector_old4 = fake_gop
.get_block(0, (blk_x_old2 + blk_y_old2 * blk_x_old as i32) as usize)
.vector;
let vector1_x = vector_old1.x * step_x_old as i32
+ delta_x * (vector_old2.x - vector_old1.x);
let vector1_y = vector_old1.y * step_x_old as i32
+ delta_x * (vector_old2.y - vector_old1.y);
let vector1_sad = vector_old1.sad * step_x_old as i64
+ delta_x as i64 * (vector_old2.sad - vector_old1.sad);
let vector2_x = vector_old3.x * step_x_old as i32
+ delta_x * (vector_old4.x - vector_old3.x);
let vector2_y = vector_old3.y * step_x_old as i32
+ delta_x * (vector_old4.y - vector_old3.y);
let vector2_sad = vector_old3.sad * step_x_old as i64
+ delta_x as i64 * (vector_old4.sad - vector_old3.sad);
MotionVector {
x: (vector1_x + delta_y * (vector2_x - vector1_x) / step_y_old as i32)
/ step_x_old as i32,
y: (vector1_y + delta_y * (vector2_y - vector1_y) / step_y_old as i32)
/ step_x_old as i32,
sad: (vector1_sad
+ delta_y as i64 * (vector2_sad - vector1_sad) / step_y_old as i64)
/ step_x_old as i64,
}
} else {
if delta_x * 2 < step_x_old as i32 && delta_y * 2 < step_y_old as i32 {
fake_gop
.get_block(0, (blk_x_old1 + blk_y_old1 * blk_x_old as i32) as usize)
.vector
} else if delta_x * 2 >= step_x_old as i32
&& delta_y * 2 < step_y_old as i32
{
fake_gop
.get_block(0, (blk_x_old2 + blk_y_old1 * blk_x_old as i32) as usize)
.vector
} else if delta_x * 2 < step_x_old as i32
&& delta_y * 2 >= step_y_old as i32
{
fake_gop
.get_block(0, (blk_x_old1 + blk_y_old2 * blk_x_old as i32) as usize)
.vector
} else {
fake_gop
.get_block(0, (blk_x_old2 + blk_y_old2 * blk_x_old as i32) as usize)
.vector
}
}
};
vector_old.x = (vector_old.x << LOG_PEL) >> log_pel_old;
vector_old.y = (vector_old.y << LOG_PEL) >> log_pel_old;
self.predictor = self.clip_mv(vector_old);
self.predictor.sad = vector_old.sad
* (self.blk_size_x.get() * self.blk_size_y.get()) as i64
/ (blk_size_x_old * blk_size_y_old) as i64;
self.best_mv = self.predictor;
let src_block_y = semisafe_get(src_plane_y, self.src_offset[0]..);
let src_block_u = if self.chroma {
semisafe_get(src_plane_u, self.src_offset[1]..)
} else {
&[]
};
let src_block_v = if self.chroma {
semisafe_get(src_plane_v, self.src_offset[2]..)
} else {
&[]
};
let sad = {
if (1..=4).contains(&DCT_MODE) {
self.dct
.as_mut()
.expect("dct helper is initialized")
.bytes_2d(
src_block_y,
self.src_pitch[0],
&mut self.dct_src,
self.dct_pitch,
)?;
}
if matches!(DCT_MODE, 3 | 4 | 7 | 8 | 10) {
self.src_luma = unsafe {
(self.block_fns.luma_sum)(
src_block_y.as_ptr().cast(),
self.src_pitch[0],
)
};
}
let mut sad = self.luma_sad::<DCT_MODE>(
src_block_y,
self.src_pitch[0],
self.get_ref_block::<LOG_PEL>(
ref_frame,
ref_plane_y,
self.predictor.x,
self.predictor.y,
),
self.ref_pitch[0],
);
if self.chroma {
sad += self.chroma_sad(
src_block_u,
self.src_pitch[1],
self.get_ref_block_u::<LOG_PEL>(
ref_frame,
ref_plane_u,
self.predictor.x,
self.predictor.y,
),
self.ref_pitch[1],
);
sad += self.chroma_sad(
src_block_v,
self.src_pitch[2],
self.get_ref_block_v::<LOG_PEL>(
ref_frame,
ref_plane_v,
self.predictor.x,
self.predictor.y,
),
self.ref_pitch[2],
);
}
sad
};
self.best_mv.sad = sad as i64;
self.min_cost = sad as i64;
if self.best_mv.sad > th_sad as i64 {
let src_planes = [src_block_y, src_block_u, src_block_v];
let ref_planes = [ref_plane_y, ref_plane_u, ref_plane_v];
self.refine::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes)?;
}
*semisafe_get_mut(&mut self.vectors, self.blk_idx) = self.best_mv;
*semisafe_get_mut(blk_data, self.blk_idx) = self.best_mv;
if iblk_x < self.blk_x.get() - 1 {
self.x[0] +=
(self.blk_size_x.get() - self.overlap_x) as i32 * self.blk_scan_dir as i32;
if (src_frame.yuv_mode & MVPlaneSet::UPLANE).bits() > 0 {
self.x[1] += ((self.blk_size_x.get() - self.overlap_x)
>> self.log_x_ratio_uv) as i32
* self.blk_scan_dir as i32;
}
if (src_frame.yuv_mode & MVPlaneSet::VPLANE).bits() > 0 {
self.x[2] += ((self.blk_size_x.get() - self.overlap_x)
>> self.log_x_ratio_uv) as i32
* self.blk_scan_dir as i32;
}
}
}
self.y[0] += (self.blk_size_y.get() - self.overlap_y) as i32;
if (src_frame.yuv_mode & MVPlaneSet::UPLANE).bits() > 0 {
self.y[1] +=
((self.blk_size_y.get() - self.overlap_y) >> self.log_y_ratio_uv) as i32;
}
if (src_frame.yuv_mode & MVPlaneSet::VPLANE).bits() > 0 {
self.y[2] +=
((self.blk_size_y.get() - self.overlap_y) >> self.log_y_ratio_uv) as i32;
}
}
Ok(())
}
#[must_use]
pub(crate) fn get_array_size(&self, divide: DivideMode) -> NonZeroUsize {
let mut size = self
.blk_count
.saturating_mul(
unsafe { NonZeroUsize::new_unchecked(size_of::<MotionVector>()) },
)
.saturating_add(size_of::<u32>());
if self.log_scale == 0 && divide != DivideMode::None {
size = size.saturating_add(
self.blk_count
.saturating_mul(
unsafe { NonZeroUsize::new_unchecked(size_of::<MotionVector>() * 4) },
)
.saturating_add(size_of::<u32>())
.get(),
);
}
size
}
pub(crate) fn estimate_global_mv_doubled(&mut self, global_mv: &mut MotionVector) {
let median_x = self.find_most_frequent(|v| v.x);
let median_y = self.find_most_frequent(|v| v.y);
let mut mean_vx = 0;
let mut mean_vy = 0;
let mut num = 0;
for i in 0..self.blk_count.get() {
let vector = semisafe_get(&self.vectors, i);
if (vector.x - median_x).abs() < 6 && (vector.y - median_y).abs() < 6 {
mean_vx += vector.x;
mean_vy += vector.y;
num += 1;
}
}
if num > 0 {
global_mv.x = 2 * mean_vx / num;
global_mv.y = 2 * mean_vy / num;
} else {
global_mv.x = 2 * median_x;
global_mv.y = 2 * median_y;
}
}
fn find_most_frequent<F>(&mut self, component_fn: F) -> i32
where
F: Fn(&MotionVector) -> i32,
{
self.freq_array = vec![0; self.freq_size.get()];
let mut ind_min = self.freq_size.get() - 1;
let mut ind_max = 0;
for i in 0..self.blk_count.get() {
let ind =
(self.freq_size.get() >> 1) as i32 + component_fn(semisafe_get(&self.vectors, i));
if ind >= 0 && ind < self.freq_size.get() as i32 {
let ind = ind as usize;
*semisafe_get_mut(&mut self.freq_array, ind) += 1;
if ind > ind_max {
ind_max = ind;
}
if ind < ind_min {
ind_min = ind;
}
}
}
let mut count = *semisafe_get(&self.freq_array, ind_min);
let mut index = ind_min;
for i in (ind_min + 1)..=ind_max {
if *semisafe_get(&self.freq_array, i) > count {
count = *semisafe_get(&self.freq_array, i);
index = i;
}
}
index as i32 - (self.freq_size.get() >> 1) as i32
}
pub(crate) fn interpolate_prediction(&mut self, other: &Self) {
let log_pel_1 = self.log_pel as i32;
let log_pel_2 = other.log_pel as i32;
let blk_size_x_1 = self.blk_size_x.get() as i32;
let blk_size_y_1 = self.blk_size_y.get() as i32;
let overlap_x_1 = self.overlap_x as i32;
let overlap_y_1 = self.overlap_y as i32;
let blk_x_1 = self.blk_x.get() as i32;
let blk_y_1 = self.blk_y.get() as i32;
let blk_x_2 = other.blk_x.get() as i32;
let blk_y_2 = other.blk_y.get() as i32;
let mut norm_factor = 3i32 - log_pel_1 + log_pel_2;
let mul_factor = if norm_factor < 0 { -norm_factor } else { 0 };
if norm_factor < 0 {
norm_factor = 0;
};
let normov = (blk_size_x_1 - overlap_x_1) * (blk_size_y_1 - overlap_y_1);
let aoddx = blk_size_x_1 * 3 - overlap_x_1 * 2;
let aevenx = blk_size_x_1 * 3 - overlap_x_1 * 4;
let aoddy = blk_size_y_1 * 3 - overlap_y_1 * 2;
let aeveny = blk_size_y_1 * 3 - overlap_y_1 * 4;
let scaleov = 1.0 / normov as f64;
let mut index = 0;
for l in 0..blk_y_1 {
for k in 0..blk_x_1 {
let mut vecs = [MotionVector::zero(); 4];
let mut i = k;
let mut j = l;
if i >= 2 * blk_x_2 {
i = 2 * blk_x_2 - 1;
}
if j >= 2 * blk_y_2 {
j = 2 * blk_y_2 - 1;
}
let offy = -1 + 2 * (j % 2);
let offx = -1 + 2 * (i % 2);
if (i == 0) || (i >= 2 * blk_x_2 - 1) {
if (j == 0) || (j >= 2 * blk_y_2 - 1) {
let idx = (i / 2 + (j / 2) * blk_x_2) as usize;
let new_vec = *semisafe_get(&other.vectors, idx);
vecs.iter_mut().for_each(|vec| *vec = new_vec);
} else {
let idx1 = (i / 2 + (j / 2) * blk_x_2) as usize;
let idx2 = (i / 2 + (j / 2 + offy) * blk_x_2) as usize;
let new_vec1 = *semisafe_get(&other.vectors, idx1);
let new_vec2 = *semisafe_get(&other.vectors, idx2);
vecs[0] = new_vec1;
vecs[1] = new_vec1;
vecs[2] = new_vec2;
vecs[3] = new_vec2;
}
} else if (j == 0) || (j >= 2 * blk_y_2 - 1) {
let idx1 = (i / 2 + (j / 2) * blk_x_2) as usize;
let idx2 = (i / 2 + offx + (j / 2) * blk_x_2) as usize;
let new_vec1 = *semisafe_get(&other.vectors, idx1);
let new_vec2 = *semisafe_get(&other.vectors, idx2);
vecs[0] = new_vec1;
vecs[1] = new_vec1;
vecs[2] = new_vec2;
vecs[3] = new_vec2;
} else {
let idx1 = (i / 2 + (j / 2) * blk_x_2) as usize;
let idx2 = (i / 2 + offx + (j / 2) * blk_x_2) as usize;
let idx3 = (i / 2 + (j / 2 + offy) * blk_x_2) as usize;
let idx4 = (i / 2 + offx + (j / 2 + offy) * blk_x_2) as usize;
let new_vec1 = *semisafe_get(&other.vectors, idx1);
let new_vec2 = *semisafe_get(&other.vectors, idx2);
let new_vec3 = *semisafe_get(&other.vectors, idx3);
let new_vec4 = *semisafe_get(&other.vectors, idx4);
vecs[0] = new_vec1;
vecs[1] = new_vec2;
vecs[2] = new_vec3;
vecs[3] = new_vec4;
}
let temp_sad;
if overlap_x_1 == 0 && overlap_y_1 == 0 {
let cur_vec = semisafe_get_mut(&mut self.vectors, index);
cur_vec.x = 9 * vecs[0].x + 3 * vecs[1].x + 3 * vecs[2].x + vecs[3].x;
cur_vec.y = 9 * vecs[0].y + 3 * vecs[1].y + 3 * vecs[2].y + vecs[3].y;
temp_sad =
9 * vecs[0].sad + 3 * vecs[1].sad + 3 * vecs[2].sad + vecs[3].sad + 8;
} else if overlap_x_1 <= (blk_size_x_1 >> 1) && overlap_y_1 <= (blk_size_y_1 >> 1) {
let ax1 = if offx > 0 { aoddx } else { aevenx } as i64;
let ax2 = ((blk_size_x_1 - overlap_x_1) * 4) as i64 - ax1;
let ay1 = if offy > 0 { aoddy } else { aeveny } as i64;
let ay2 = ((blk_size_y_1 - overlap_y_1) * 4) as i64 - ay1;
let a11 = ax1 * ay1;
let a12 = ax1 * ay2;
let a21 = ax2 * ay1;
let a22 = ax2 * ay2;
let cur_vec = semisafe_get_mut(&mut self.vectors, index);
cur_vec.x = ((a11 * vecs[0].x as i64
+ a21 * vecs[1].x as i64
+ a12 * vecs[2].x as i64
+ a22 * vecs[3].x as i64) as f64
* scaleov) as i32;
cur_vec.y = ((a11 * vecs[0].y as i64
+ a21 * vecs[1].y as i64
+ a12 * vecs[2].y as i64
+ a22 * vecs[3].y as i64) as f64
* scaleov) as i32;
temp_sad = ((a11 * vecs[0].sad
+ a21 * vecs[1].sad
+ a12 * vecs[2].sad
+ a22 * vecs[3].sad) as f64
* scaleov) as i64;
} else {
let cur_vec = semisafe_get_mut(&mut self.vectors, index);
cur_vec.x = (vecs[0].x + vecs[1].x + vecs[2].x + vecs[3].x) << 2;
cur_vec.y = (vecs[0].y + vecs[1].y + vecs[2].y + vecs[3].y) << 2;
temp_sad = (vecs[0].sad + vecs[1].sad + vecs[2].sad + vecs[3].sad + 2) << 2;
}
let cur_vec = semisafe_get_mut(&mut self.vectors, index);
cur_vec.x = (cur_vec.x >> norm_factor) * (1 << mul_factor);
cur_vec.y = (cur_vec.y >> norm_factor) * (1 << mul_factor);
cur_vec.sad = temp_sad >> 4;
index += 1;
}
}
}
#[must_use]
fn clip_mv(&self, v: MotionVector) -> MotionVector {
MotionVector {
x: self.clip_mv_x(v.x),
y: self.clip_mv_y(v.y),
sad: v.sad,
}
}
#[must_use]
fn clip_mv_x(&self, x: i32) -> i32 {
min(max(x, self.dx_min), self.dx_max - 1)
}
#[must_use]
fn clip_mv_y(&self, y: i32) -> i32 {
min(max(y, self.dy_min), self.dy_max - 1)
}
fn pseudo_epz_search<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_frame_data: &FramePlanes<'_, T>,
ref_frame: &MVFrame,
ref_frame_data: &FramePlanes<'_, T>,
) -> Result<()> {
let src_plane_y = src_frame_data.plane(0)?;
let src_plane_u = if self.chroma {
src_frame_data.plane(1)?
} else {
&[]
};
let src_plane_v = if self.chroma {
src_frame_data.plane(2)?
} else {
&[]
};
let src_planes = [
semisafe_get(src_plane_y, self.src_offset[0]..),
semisafe_get(src_plane_u, self.src_offset[1]..),
semisafe_get(src_plane_v, self.src_offset[2]..),
];
let ref_plane_y: &[T] = ref_frame_data.plane(0)?;
let ref_plane_u: &[T] = if self.chroma {
ref_frame_data.plane(1)?
} else {
&[]
};
let ref_plane_v: &[T] = if self.chroma {
ref_frame_data.plane(2)?
} else {
&[]
};
let ref_planes = [ref_plane_y, ref_plane_u, ref_plane_v];
self.fetch_predictors();
if (1..=4).contains(&DCT_MODE) {
self.dct.as_mut().expect("dct should exist here").bytes_2d(
src_planes[0],
self.src_pitch[0],
&mut self.dct_src,
self.dct_pitch,
)?;
}
if matches!(DCT_MODE, 3 | 4 | 7 | 8 | 10) {
self.src_luma = unsafe {
(self.block_fns.luma_sum)(src_planes[0].as_ptr().cast(), self.src_pitch[0])
};
}
self.best_mv.x = self.zero_mv_field_shifted.x;
self.best_mv.y = self.zero_mv_field_shifted.y;
let mut sad = self.luma_sad::<DCT_MODE>(
src_planes[0],
self.src_pitch[0],
self.get_ref_block::<LOG_PEL>(
ref_frame,
ref_planes[0],
0,
self.zero_mv_field_shifted.y,
),
self.ref_pitch[0],
);
if self.chroma {
sad += self.chroma_sad(
src_planes[1],
self.src_pitch[1],
self.get_ref_block_u::<LOG_PEL>(ref_frame, ref_planes[1], 0, 0),
self.ref_pitch[1],
);
sad += self.chroma_sad(
src_planes[2],
self.src_pitch[2],
self.get_ref_block_v::<LOG_PEL>(ref_frame, ref_planes[2], 0, 0),
self.ref_pitch[2],
);
}
self.best_mv.sad = sad as i64;
self.min_cost = (sad + ((self.penalty_zero as u64 * sad) >> 8)) as i64;
let mut best_mv_many = [MotionVector::zero(); 8];
let mut min_cost_many = [0; 8];
if self.try_many {
self.refine::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes)?;
best_mv_many[0] = self.best_mv;
min_cost_many[0] = self.min_cost;
}
self.global_mv_predictor = self.clip_mv(self.global_mv_predictor);
let mut sad = self.luma_sad::<DCT_MODE>(
src_planes[0],
self.src_pitch[0],
self.get_ref_block::<LOG_PEL>(
ref_frame,
ref_planes[0],
self.global_mv_predictor.x,
self.global_mv_predictor.y,
),
self.ref_pitch[0],
);
if self.chroma {
sad += self.chroma_sad(
src_planes[1],
self.src_pitch[1],
self.get_ref_block_u::<LOG_PEL>(
ref_frame,
ref_planes[1],
self.global_mv_predictor.x,
self.global_mv_predictor.y,
),
self.ref_pitch[1],
);
sad += self.chroma_sad(
src_planes[2],
self.src_pitch[2],
self.get_ref_block_v::<LOG_PEL>(
ref_frame,
ref_planes[2],
self.global_mv_predictor.x,
self.global_mv_predictor.y,
),
self.ref_pitch[2],
);
}
let cost = (sad + ((self.penalty_global as u64 * sad) >> 8)) as i64;
if cost < self.min_cost || self.try_many {
self.best_mv.x = self.global_mv_predictor.x;
self.best_mv.y = self.global_mv_predictor.y;
self.best_mv.sad = sad as i64;
self.min_cost = cost;
}
if self.try_many {
self.refine::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes)?;
best_mv_many[1] = self.best_mv;
min_cost_many[1] = self.min_cost;
}
let mut sad = self.luma_sad::<DCT_MODE>(
src_planes[0],
self.src_pitch[0],
self.get_ref_block::<LOG_PEL>(
ref_frame,
ref_planes[0],
self.predictor.x,
self.predictor.y,
),
self.ref_pitch[0],
);
if self.chroma {
sad += self.chroma_sad(
src_planes[1],
self.src_pitch[1],
self.get_ref_block_u::<LOG_PEL>(
ref_frame,
ref_planes[1],
self.predictor.x,
self.predictor.y,
),
self.ref_pitch[1],
);
sad += self.chroma_sad(
src_planes[2],
self.src_pitch[2],
self.get_ref_block_v::<LOG_PEL>(
ref_frame,
ref_planes[2],
self.predictor.x,
self.predictor.y,
),
self.ref_pitch[2],
);
}
let cost = sad;
if (cost as i64) < self.min_cost || self.try_many {
self.best_mv.x = self.predictor.x;
self.best_mv.y = self.predictor.y;
self.best_mv.sad = sad as i64;
self.min_cost = cost as i64;
}
if self.try_many {
self.refine::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes)?;
best_mv_many[2] = self.best_mv;
min_cost_many[2] = self.min_cost;
}
let npred = 4;
for i in 0..npred {
if self.try_many {
self.min_cost = self.very_big_sad.get() as i64 + 1;
}
let predictor = semisafe_get(&self.predictors, i);
self.check_mv0::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
predictor.x,
predictor.y,
)?;
if self.try_many {
self.refine::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes)?;
*semisafe_get_mut(&mut best_mv_many, i + 3) = self.best_mv;
*semisafe_get_mut(&mut min_cost_many, i + 3) = self.min_cost;
}
}
if self.try_many {
self.min_cost = self.very_big_sad.get() as i64 + 1;
for i in 0..(npred + 3) {
if *semisafe_get(&min_cost_many, i) < self.min_cost {
self.best_mv = *semisafe_get(&best_mv_many, i);
self.min_cost = *semisafe_get(&min_cost_many, i);
}
}
} else {
self.refine::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes)?;
}
let found_sad = self.best_mv.sad;
const BADCOUNT_LIMIT: u64 = 16;
if self.blk_idx > 1
&& found_sad
> (self.bad_sad + self.bad_sad * self.bad_count as u64 / BADCOUNT_LIMIT) as i64
{
self.bad_count += 1;
if self.bad_range > 0 {
self.umh_search::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
self.bad_range * (1 << LOG_PEL),
0,
0,
)?;
} else if self.bad_range < 0 {
for i in (1..(-self.bad_range * (1 << LOG_PEL))).step_by(1 << LOG_PEL) {
self.expanding_search::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
i,
1 << LOG_PEL,
0,
0,
)?;
if self.best_mv.sad < found_sad / 4 {
break;
}
}
}
for i in 1..(1 << LOG_PEL) {
self.expanding_search::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
i,
1,
self.best_mv.x,
self.best_mv.y,
)?;
}
}
*semisafe_get_mut(&mut self.vectors, self.blk_idx) = self.best_mv;
Ok(())
}
fn get_ref_block<'a, const LOG_PEL: usize>(
&self,
ref_frame: &MVFrame,
ref_plane: &'a [T],
vx: i32,
vy: i32,
) -> &'a [T] {
let mvplane = semisafe_get(&ref_frame.planes, 0);
let offset = match LOG_PEL {
0 => mvplane.get_absolute_pix_offset_pel1(self.x[0] + vx, self.y[0] + vy),
1 => mvplane.get_absolute_pix_offset_pel2(self.x[0] * 2 + vx, self.y[0] * 2 + vy),
2 => mvplane.get_absolute_pix_offset_pel4(self.x[0] * 4 + vx, self.y[0] * 4 + vy),
_ => unreachable!(),
};
semisafe_get(ref_plane, offset..)
}
fn get_ref_block_u<'a, const LOG_PEL: usize>(
&self,
ref_frame: &MVFrame,
ref_plane: &'a [T],
vx: i32,
vy: i32,
) -> &'a [T] {
self.get_ref_block_chroma::<LOG_PEL>(ref_frame, ref_plane, vx, vy, 1)
}
fn get_ref_block_v<'a, const LOG_PEL: usize>(
&self,
ref_frame: &MVFrame,
ref_plane: &'a [T],
vx: i32,
vy: i32,
) -> &'a [T] {
self.get_ref_block_chroma::<LOG_PEL>(ref_frame, ref_plane, vx, vy, 2)
}
fn get_ref_block_chroma<'a, const LOG_PEL: usize>(
&self,
ref_frame: &MVFrame,
ref_plane: &'a [T],
vx: i32,
vy: i32,
plane_idx: usize,
) -> &'a [T] {
let xbias = if vx < 0 {
(1 << self.log_x_ratio_uv) - 1
} else {
0
};
let ybias = if vy < 0 {
(1 << self.log_y_ratio_uv) - 1
} else {
0
};
let mvplane = semisafe_get(&ref_frame.planes, plane_idx);
let offset = match LOG_PEL {
0 => mvplane.get_absolute_pix_offset_pel1(
semisafe_get(&self.x, plane_idx) + ((vx + xbias) >> self.log_x_ratio_uv),
semisafe_get(&self.y, plane_idx) + ((vy + ybias) >> self.log_y_ratio_uv),
),
1 => mvplane.get_absolute_pix_offset_pel2(
semisafe_get(&self.x, plane_idx) * 2 + ((vx + xbias) >> self.log_x_ratio_uv),
semisafe_get(&self.y, plane_idx) * 2 + ((vy + ybias) >> self.log_y_ratio_uv),
),
2 => mvplane.get_absolute_pix_offset_pel4(
semisafe_get(&self.x, plane_idx) * 4 + ((vx + xbias) >> self.log_x_ratio_uv),
semisafe_get(&self.y, plane_idx) * 4 + ((vy + ybias) >> self.log_y_ratio_uv),
),
_ => unreachable!(),
};
semisafe_get(ref_plane, offset..)
}
fn fetch_predictors(&mut self) {
if (self.blk_scan_dir == 1 && self.blk_x_i > 0)
|| (self.blk_scan_dir == -1 && self.blk_x_i < self.blk_x.get() - 1)
{
self.predictors[1] = self.clip_mv(*semisafe_get(
&self.vectors,
(self.blk_idx as isize - self.blk_scan_dir as isize) as usize,
));
} else {
self.predictors[1] = self.clip_mv(self.zero_mv_field_shifted);
}
if self.blk_y_i > 0 {
self.predictors[2] = self.clip_mv(*semisafe_get(
&self.vectors,
self.blk_idx - self.blk_x.get(),
));
} else {
self.predictors[2] = self.clip_mv(self.zero_mv_field_shifted);
}
if (self.blk_y_i < self.blk_y.get() - 1)
&& ((self.blk_scan_dir == 1 && self.blk_x_i < self.blk_x.get() - 1)
|| (self.blk_scan_dir == -1 && self.blk_x_i > 0))
{
self.predictors[3] = self.clip_mv(*semisafe_get(
&self.vectors,
((self.blk_idx + self.blk_x.get()) as isize + self.blk_scan_dir as isize) as usize,
));
} else if (self.blk_y_i > 0)
&& ((self.blk_scan_dir == 1 && self.blk_x_i < self.blk_x.get() - 1)
|| (self.blk_scan_dir == -1 && self.blk_x_i > 0))
{
self.predictors[3] = self.clip_mv(*semisafe_get(
&self.vectors,
(self.blk_idx as isize - self.blk_x.get() as isize + self.blk_scan_dir as isize)
as usize,
));
} else {
self.predictors[3] = self.clip_mv(self.zero_mv_field_shifted);
}
if self.blk_y_i > 0 {
self.predictors[0].x = median(
self.predictors[1].x,
self.predictors[2].x,
self.predictors[3].x,
);
self.predictors[0].y = median(
self.predictors[1].y,
self.predictors[2].y,
self.predictors[3].y,
);
self.predictors[0].sad = max(
self.predictors[1].sad,
max(self.predictors[2].sad, self.predictors[3].sad),
);
} else {
self.predictors[0] = self.predictors[1];
}
if self.smallest_plane {
self.predictor = self.predictors[0];
}
let scale =
self.lambda_sad as f64 / (self.lambda_sad as i64 + (self.predictor.sad >> 1)) as f64;
self.lambda = (self.lambda as f64 * scale * scale) as u32;
}
#[must_use]
fn luma_sad<const DCT_MODE: u8>(
&mut self,
src_plane: &[T],
src_pitch: NonZeroUsize,
ref_plane: &[T],
ref_pitch: NonZeroUsize,
) -> u64 {
let dct_mode = DctMode::try_from(DCT_MODE as i64).expect("invalid dct mode");
match dct_mode {
DctMode::Spatial => unsafe {
(self.block_fns.luma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
)
},
DctMode::Dct => self.reduction_corrected_dct(ref_plane, ref_pitch),
DctMode::MixedSpatialDct => unsafe {
let sad = (self.block_fns.luma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
let dct_sad = if self.dct_weight_16 > 0 {
self.reduction_corrected_dct(ref_plane, ref_pitch)
} else {
0
};
(sad * (16 - self.dct_weight_16 as u64) + dct_sad * self.dct_weight_16 as u64) / 16
},
DctMode::AdaptiveSpatialMixed => unsafe {
self.ref_luma = (self.block_fns.luma_sum)(ref_plane.as_ptr().cast(), ref_pitch);
let sad = (self.block_fns.luma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
if (self.src_luma as i64 - self.ref_luma as i64).unsigned_abs()
> ((self.src_luma + self.ref_luma) >> 5)
{
let dct_sad = self.bsize_corrected_dct(ref_plane, ref_pitch);
sad / 2 + dct_sad / 2
} else {
sad
}
},
DctMode::AdaptiveSpatialDct => unsafe {
self.ref_luma = (self.block_fns.luma_sum)(ref_plane.as_ptr().cast(), ref_pitch);
let sad = (self.block_fns.luma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
if (self.src_luma as i64 - self.ref_luma as i64).unsigned_abs()
> ((self.src_luma + self.ref_luma) >> 5)
{
let dct_sad = self.bsize_corrected_dct(ref_plane, ref_pitch);
sad / 4 + dct_sad / 2 + dct_sad / 4
} else {
sad
}
},
DctMode::Satd => unsafe {
(self.block_fns.satd)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
)
},
DctMode::MixedSatdDct => unsafe {
let sad = (self.block_fns.luma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
if self.dct_weight_16 > 0 {
let dct_sad = (self.block_fns.satd)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
(sad * (16 - self.dct_weight_16 as u64) + dct_sad * self.dct_weight_16 as u64)
/ 16
} else {
sad
}
},
DctMode::AdaptiveSatdMixed => unsafe {
self.ref_luma = (self.block_fns.luma_sum)(ref_plane.as_ptr().cast(), ref_pitch);
let sad = (self.block_fns.luma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
if (self.src_luma as i64 - self.ref_luma as i64).unsigned_abs()
> ((self.src_luma + self.ref_luma) >> 5)
{
let dct_sad = (self.block_fns.satd)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
sad / 2 + dct_sad / 2
} else {
sad
}
},
DctMode::AdaptiveSatdDct => unsafe {
self.ref_luma = (self.block_fns.luma_sum)(ref_plane.as_ptr().cast(), ref_pitch);
let sad = (self.block_fns.luma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
if (self.src_luma as i64 - self.ref_luma as i64).unsigned_abs()
> ((self.src_luma + self.ref_luma) >> 5)
{
let dct_sad = (self.block_fns.satd)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
sad / 4 + dct_sad / 2 + dct_sad / 4
} else {
sad
}
},
DctMode::MixedSadEqSatdDct => unsafe {
let sad = (self.block_fns.luma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
if self.dct_weight_16 > 1 {
let dct_weight_half = self.dct_weight_16 as u64 / 2;
let dct_sad = (self.block_fns.satd)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
(sad * (16 - dct_weight_half) + dct_sad * dct_weight_half) / 16
} else {
sad
}
},
DctMode::AdaptiveSatdLuma => unsafe {
self.ref_luma = (self.block_fns.luma_sum)(ref_plane.as_ptr().cast(), ref_pitch);
let sad = (self.block_fns.luma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
if (self.src_luma as i64 - self.ref_luma as i64).unsigned_abs()
> ((self.src_luma + self.ref_luma) >> 4)
{
let dct_sad = (self.block_fns.satd)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
);
sad / 2 + dct_sad / 4 + sad / 4
} else {
sad
}
},
}
}
#[must_use]
fn chroma_sad(
&self,
src_plane: &[T],
src_pitch: NonZeroUsize,
ref_plane: &[T],
ref_pitch: NonZeroUsize,
) -> u64 {
unsafe {
(self.block_fns.chroma_sad)(
src_plane.as_ptr().cast(),
src_pitch,
ref_plane.as_ptr().cast(),
ref_pitch,
)
}
}
#[must_use]
fn bsize_corrected_dct(&mut self, ref_plane: &[T], ref_pitch: NonZeroUsize) -> u64 {
self.dct
.as_mut()
.expect("dct helper should be defined")
.bytes_2d(ref_plane, ref_pitch, &mut self.dct_ref, self.dct_pitch)
.expect("dct should not fail with valid params");
unsafe {
(self.block_fns.luma_sad)(
self.dct_src.as_ptr().cast(),
self.dct_pitch,
self.dct_ref.as_ptr().cast(),
self.dct_pitch,
) * self.blk_size_x.get() as u64
/ 2
}
}
#[must_use]
fn reduction_corrected_dct(&mut self, ref_plane: &[T], ref_pitch: NonZeroUsize) -> u64 {
self.dct
.as_mut()
.expect("dct helper should be defined")
.bytes_2d(ref_plane, ref_pitch, &mut self.dct_ref, self.dct_pitch)
.expect("dct should not fail with valid params");
let src0: i64 = semisafe_get(&self.dct_src, 0).as_();
let ref0: i64 = semisafe_get(&self.dct_ref, 0).as_();
unsafe {
(self.block_fns.luma_sad)(
self.dct_src.as_ptr().cast(),
self.dct_pitch,
self.dct_ref.as_ptr().cast(),
self.dct_pitch,
) + (src0 - ref0).unsigned_abs() * 3 * self.blk_size_x.get() as u64 / 2
}
}
fn refine<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
) -> Result<()> {
match self.search_type {
SearchType::Onetime => {
let mut i = self.search_param;
while i > 0 {
self.one_time_search::<DCT_MODE, LOG_PEL>(
src_planes, ref_frame, ref_planes, i,
)?;
i /= 2;
}
}
SearchType::Nstep => {
self.n_step_search::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
self.search_param,
)?;
}
SearchType::Logarithmic => {
let mut i = self.search_param;
while i > 0 {
self.diamond_search::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, i)?;
i /= 2;
}
}
SearchType::Exhaustive => {
let mvx = self.best_mv.x;
let mvy = self.best_mv.y;
for i in 1..=self.search_param {
self.expanding_search::<DCT_MODE, LOG_PEL>(
src_planes, ref_frame, ref_planes, i, 1, mvx, mvy,
)?;
}
}
SearchType::Hex2 => {
self.hex2_search::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
self.search_param,
)?;
}
SearchType::UnevenMultiHexagon => {
self.umh_search::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
self.search_param,
self.best_mv.x,
self.best_mv.y,
)?;
}
SearchType::Horizontal => {
let mvx = self.best_mv.x;
let mvy = self.best_mv.y;
for i in 1..=self.search_param {
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
mvx - i,
mvy,
)?;
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
mvx + i,
mvy,
)?;
}
}
SearchType::Vertical => {
let mvx = self.best_mv.x;
let mvy = self.best_mv.y;
for i in 1..=self.search_param {
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
mvx,
mvy - i,
)?;
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
mvx,
mvy + i,
)?;
}
}
}
Ok(())
}
fn check_mv0<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
vx: i32,
vy: i32,
) -> Result<()> {
self.check_mv_impl::<DCT_MODE, LOG_PEL, { CheckMVFlags::UPDATE_BEST_MV.bits() }>(
src_planes, ref_frame, ref_planes, vx, vy, &mut 0, 0,
)
}
fn check_mv<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
vx: i32,
vy: i32,
) -> Result<()> {
self.check_mv_impl::<DCT_MODE, LOG_PEL, { CheckMVFlags::PENALTY_NEW.bits() | CheckMVFlags::UPDATE_BEST_MV.bits() }>(
src_planes, ref_frame, ref_planes, vx, vy, &mut 0, 0,
)
}
fn check_mv2<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
vx: i32,
vy: i32,
direction: &mut i32,
val: i32,
) -> Result<()> {
self.check_mv_impl::<DCT_MODE, LOG_PEL, {
CheckMVFlags::PENALTY_NEW.bits()
| CheckMVFlags::UPDATE_DIR.bits()
| CheckMVFlags::UPDATE_BEST_MV.bits()
}>(src_planes, ref_frame, ref_planes, vx, vy, direction, val)
}
fn check_mv_dir<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
vx: i32,
vy: i32,
direction: &mut i32,
val: i32,
) -> Result<()> {
self.check_mv_impl::<DCT_MODE, LOG_PEL, {
CheckMVFlags::PENALTY_NEW.bits()
| CheckMVFlags::UPDATE_DIR.bits()
}>(src_planes, ref_frame, ref_planes, vx, vy, direction, val)
}
fn check_mv_impl<const DCT_MODE: u8, const LOG_PEL: usize, const CHECK_MV_FLAGS: u32>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
vx: i32,
vy: i32,
direction: &mut i32,
val: i32,
) -> Result<()> {
if !self.is_vector_ok(vx, vy) {
return Ok(());
}
let mut cost = self.motion_distortion(vx, vy);
if cost >= self.min_cost {
return Ok(());
}
let flags = CheckMVFlags::from_bits(CHECK_MV_FLAGS).expect("invalid check mv flags");
let sad = self.luma_sad::<DCT_MODE>(
src_planes[0],
self.src_pitch[0],
self.get_ref_block::<LOG_PEL>(ref_frame, ref_planes[0], vx, vy),
self.ref_pitch[0],
) as i64;
cost += sad
+ if flags.contains(CheckMVFlags::PENALTY_NEW) {
(self.penalty_new as i64 * sad) >> 8
} else {
0
};
if cost >= self.min_cost {
return Ok(());
}
let mut sad_uv = 0;
if self.chroma {
sad_uv += self.chroma_sad(
src_planes[1],
self.src_pitch[1],
self.get_ref_block_u::<LOG_PEL>(ref_frame, ref_planes[1], vx, vy),
self.ref_pitch[1],
) as i64;
sad_uv += self.chroma_sad(
src_planes[2],
self.src_pitch[2],
self.get_ref_block_v::<LOG_PEL>(ref_frame, ref_planes[2], vx, vy),
self.ref_pitch[2],
) as i64;
cost += sad_uv
+ if flags.contains(CheckMVFlags::PENALTY_NEW) {
(self.penalty_new as i64 * sad_uv) >> 8
} else {
0
};
if cost >= self.min_cost {
return Ok(());
}
}
if flags.contains(CheckMVFlags::UPDATE_BEST_MV) {
self.best_mv.x = vx;
self.best_mv.y = vy;
}
self.min_cost = cost;
self.best_mv.sad = sad + sad_uv;
if flags.contains(CheckMVFlags::UPDATE_DIR) {
*direction = val;
}
Ok(())
}
fn one_time_search<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
length: i32,
) -> Result<()> {
let mut direction = 0;
let mut dx = self.best_mv.x;
let mut dy = self.best_mv.y;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy,
&mut direction,
2,
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy,
&mut direction,
1,
)?;
if direction == 1 {
while direction > 0 {
direction = 0;
dx += length;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy,
&mut direction,
1,
)?;
}
} else if direction == 2 {
while direction > 0 {
direction = 0;
dx -= length;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy,
&mut direction,
1,
)?;
}
}
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx,
dy - length,
&mut direction,
2,
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx,
dy + length,
&mut direction,
1,
)?;
if direction == 1 {
while direction > 0 {
direction = 0;
dy += length;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx,
dy + length,
&mut direction,
1,
)?;
}
} else if direction == 2 {
while direction > 0 {
direction = 0;
dy -= length;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx,
dy - length,
&mut direction,
1,
)?;
}
}
Ok(())
}
fn n_step_search<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
step: i32,
) -> Result<()> {
let mut dx;
let mut dy;
let mut length = step;
while length > 0 {
dx = self.best_mv.x;
dy = self.best_mv.y;
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy + length,
)?;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, dx + length, dy)?;
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy - length,
)?;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, dx, dy - length)?;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, dx, dy + length)?;
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy + length,
)?;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, dx - length, dy)?;
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy - length,
)?;
length -= 1;
}
Ok(())
}
fn diamond_search<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
length: i32,
) -> Result<()> {
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct Direction: i32 {
const RIGHT = 1;
const LEFT = 2;
const DOWN = 4;
const UP = 8;
}
}
let mut dx;
let mut dy;
let mut direction = Direction::all().bits();
let mut last_direction;
while direction > 0 {
dx = self.best_mv.x;
dy = self.best_mv.y;
last_direction = Direction::from_bits(direction).expect("valid direction");
direction = Direction::empty().bits();
if last_direction.contains(Direction::RIGHT) {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy,
&mut direction,
Direction::RIGHT.bits(),
)?;
}
if last_direction.contains(Direction::LEFT) {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy,
&mut direction,
Direction::LEFT.bits(),
)?;
}
if last_direction.contains(Direction::DOWN) {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx,
dy + length,
&mut direction,
Direction::DOWN.bits(),
)?;
}
if last_direction.contains(Direction::UP) {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx,
dy - length,
&mut direction,
Direction::UP.bits(),
)?;
}
if direction > 0 {
last_direction = Direction::from_bits(direction).expect("valid direction");
dx = self.best_mv.x;
dy = self.best_mv.y;
if last_direction.bits() & (Direction::RIGHT.bits() + Direction::LEFT.bits()) > 0 {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx,
dy + length,
&mut direction,
Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx,
dy - length,
&mut direction,
Direction::UP.bits(),
)?;
} else {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy,
&mut direction,
Direction::RIGHT.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy,
&mut direction,
Direction::LEFT.bits(),
)?;
}
} else {
if last_direction.bits() == Direction::RIGHT.bits() {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy + length,
&mut direction,
Direction::RIGHT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy - length,
&mut direction,
Direction::RIGHT.bits() + Direction::UP.bits(),
)?;
} else if last_direction.bits() == Direction::LEFT.bits() {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy + length,
&mut direction,
Direction::LEFT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy - length,
&mut direction,
Direction::LEFT.bits() + Direction::UP.bits(),
)?;
} else if last_direction.bits() == Direction::DOWN.bits() {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy + length,
&mut direction,
Direction::RIGHT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy + length,
&mut direction,
Direction::LEFT.bits() + Direction::DOWN.bits(),
)?;
} else if last_direction.bits() == Direction::UP.bits() {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy - length,
&mut direction,
Direction::RIGHT.bits() + Direction::UP.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy - length,
&mut direction,
Direction::LEFT.bits() + Direction::UP.bits(),
)?;
} else if last_direction.bits() == Direction::RIGHT.bits() + Direction::DOWN.bits()
{
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy + length,
&mut direction,
Direction::RIGHT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy + length,
&mut direction,
Direction::LEFT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy - length,
&mut direction,
Direction::RIGHT.bits() + Direction::UP.bits(),
)?;
} else if last_direction.bits() == Direction::LEFT.bits() + Direction::DOWN.bits() {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy + length,
&mut direction,
Direction::RIGHT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy + length,
&mut direction,
Direction::LEFT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy - length,
&mut direction,
Direction::LEFT.bits() + Direction::UP.bits(),
)?;
} else if last_direction.bits() == Direction::RIGHT.bits() + Direction::UP.bits() {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy + length,
&mut direction,
Direction::RIGHT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy - length,
&mut direction,
Direction::LEFT.bits() + Direction::UP.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy - length,
&mut direction,
Direction::RIGHT.bits() + Direction::UP.bits(),
)?;
} else if last_direction.bits() == Direction::LEFT.bits() + Direction::UP.bits() {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy - length,
&mut direction,
Direction::LEFT.bits() + Direction::UP.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy + length,
&mut direction,
Direction::LEFT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy - length,
&mut direction,
Direction::RIGHT.bits() + Direction::UP.bits(),
)?;
} else {
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy + length,
&mut direction,
Direction::RIGHT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy + length,
&mut direction,
Direction::LEFT.bits() + Direction::DOWN.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx + length,
dy - length,
&mut direction,
Direction::RIGHT.bits() + Direction::UP.bits(),
)?;
self.check_mv2::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
dx - length,
dy - length,
&mut direction,
Direction::LEFT.bits() + Direction::UP.bits(),
)?;
}
}
}
Ok(())
}
fn hex2_search<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
i_me_range: i32,
) -> Result<()> {
const MOD6M1: [i32; 8] = [5, 0, 1, 2, 3, 4, 5, 0];
const HEX2: [[i32; 2]; 8] = [
[-1, -2],
[-2, 0],
[-1, 2],
[1, 2],
[2, 0],
[1, -2],
[-1, -2],
[-2, 0],
];
let mut direction = -2;
let mut bmx = self.best_mv.x;
let mut bmy = self.best_mv.y;
if i_me_range > 1 {
self.check_mv_dir::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
bmx - 2,
bmy,
&mut direction,
0,
)?;
self.check_mv_dir::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
bmx - 1,
bmy + 2,
&mut direction,
1,
)?;
self.check_mv_dir::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
bmx + 1,
bmy + 2,
&mut direction,
2,
)?;
self.check_mv_dir::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
bmx + 2,
bmy,
&mut direction,
3,
)?;
self.check_mv_dir::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
bmx + 1,
bmy - 2,
&mut direction,
4,
)?;
self.check_mv_dir::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
bmx - 1,
bmy - 2,
&mut direction,
5,
)?;
if direction != -2 {
bmx += semisafe_get(&HEX2, (direction + 1) as usize)[0];
bmy += semisafe_get(&HEX2, (direction + 1) as usize)[1];
for _ in 1..(i_me_range / 2) {
if !self.is_vector_ok(bmx, bmy) {
break;
}
let odir = *semisafe_get(&MOD6M1, (direction + 1) as usize);
direction = -2;
self.check_mv_dir::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
bmx + semisafe_get(&HEX2, odir as usize)[0],
bmy + semisafe_get(&HEX2, odir as usize)[1],
&mut direction,
odir - 1,
)?;
self.check_mv_dir::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
bmx + semisafe_get(&HEX2, odir as usize + 1)[0],
bmy + semisafe_get(&HEX2, odir as usize + 1)[1],
&mut direction,
odir,
)?;
self.check_mv_dir::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
bmx + semisafe_get(&HEX2, odir as usize + 2)[0],
bmy + semisafe_get(&HEX2, odir as usize + 2)[1],
&mut direction,
odir + 1,
)?;
if direction == -2 {
break;
}
bmx += semisafe_get(&HEX2, (direction + 1) as usize)[0];
bmy += semisafe_get(&HEX2, (direction + 1) as usize)[1];
}
}
self.best_mv.x = bmx;
self.best_mv.y = bmy;
}
self.expanding_search::<DCT_MODE, LOG_PEL>(
src_planes, ref_frame, ref_planes, 1, 1, bmx, bmy,
)?;
Ok(())
}
fn umh_search<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
me_range: i32,
omx: i32,
omy: i32,
) -> Result<()> {
const HEX4: [[i32; 2]; 16] = [
[-4, 2],
[-4, 1],
[-4, 0],
[-4, -1],
[-4, -2],
[4, -2],
[4, -1],
[4, 0],
[4, 1],
[4, 2],
[2, 3],
[0, 4],
[-2, 3],
[-2, -3],
[0, -4],
[2, -3],
];
self.cross_search::<DCT_MODE, LOG_PEL>(
src_planes, ref_frame, ref_planes, 1, me_range, me_range, omx, omy,
)?;
let mut i = 1;
loop {
for j in 0..16 {
let mx = omx + semisafe_get(&HEX4, j)[0] * i;
let my = omy + semisafe_get(&HEX4, j)[1] * i;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, mx, my)?;
}
i += 1;
if i > me_range / 4 {
break;
}
}
self.hex2_search::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, me_range)?;
Ok(())
}
fn cross_search<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
start: i32,
x_max: i32,
y_max: i32,
mvx: i32,
mvy: i32,
) -> Result<()> {
for i in (start..x_max).step_by(2) {
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, mvx - i, mvy)?;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, mvx + i, mvy)?;
}
for j in (start..y_max).step_by(2) {
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, mvx, mvy - j)?;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, mvx, mvy + j)?;
}
Ok(())
}
fn expanding_search<const DCT_MODE: u8, const LOG_PEL: usize>(
&mut self,
src_planes: [&[T]; 3],
ref_frame: &MVFrame,
ref_planes: [&[T]; 3],
r: i32,
s: usize,
mvx: i32,
mvy: i32,
) -> Result<()> {
for i in ((-r + s as i32)..r).step_by(s) {
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
mvx + i,
mvy - r,
)?;
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
mvx + i,
mvy + r,
)?;
}
for j in ((-r + s as i32)..r).step_by(s) {
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
mvx - r,
mvy + j,
)?;
self.check_mv::<DCT_MODE, LOG_PEL>(
src_planes,
ref_frame,
ref_planes,
mvx + r,
mvy + j,
)?;
}
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, mvx - r, mvy - r)?;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, mvx - r, mvy + r)?;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, mvx + r, mvy - r)?;
self.check_mv::<DCT_MODE, LOG_PEL>(src_planes, ref_frame, ref_planes, mvx + r, mvy + r)?;
Ok(())
}
#[must_use]
const fn is_vector_ok(&self, vx: i32, vy: i32) -> bool {
(vx >= self.dx_min) && (vy >= self.dy_min) && (vx < self.dx_max) && (vy < self.dy_max)
}
#[must_use]
const fn motion_distortion(&self, vx: i32, vy: i32) -> i64 {
let dist = MotionVector::square_difference_norm(
self.predictor,
MotionVector {
x: vx,
y: vy,
sad: 0,
},
);
(self.lambda as i64 * dist as i64) >> 8
}
#[must_use]
pub(crate) fn write_default_to_array(&self, divide_extra: DivideMode) -> Vec<u8> {
let empty_mv = MotionVector {
x: 0,
y: 0,
sad: self.very_big_sad.get() as i64,
};
let array_size = self.get_array_size(divide_extra).get();
let mut data = Vec::with_capacity(array_size);
let start_size = size_of::<u32>() + self.blk_count.get() * size_of::<MotionVector>();
data.extend_from_slice(&(start_size as u32).to_le_bytes());
for _ in 0..self.blk_count.get() {
data.extend_from_slice(empty_mv.bytes());
}
if self.log_scale == 0 && divide_extra != DivideMode::None {
let subarray_size =
size_of::<u32>() + self.blk_count.get() * size_of::<MotionVector>() * 4;
data.extend_from_slice(&(subarray_size as u32).to_le_bytes());
for _ in 0..(self.blk_count.get() * 4) {
data.extend_from_slice(empty_mv.bytes());
}
}
data
}
}
#[derive(Debug, Clone)]
pub struct MvsOutput {
pub validity: bool,
pub block_data: Box<[u8]>,
}
struct SearchMvsArgs<'a, T> {
pub out_idx: usize,
pub src_frame: &'a MVFrame,
pub src_frame_data: &'a FramePlanes<'a, T>,
pub ref_frame: &'a MVFrame,
pub ref_frame_data: &'a FramePlanes<'a, T>,
pub search_type: SearchType,
pub search_param: i32,
pub lambda: u32,
pub lambda_sad: u32,
pub penalty_new: u16,
pub penalty_level: PenaltyScaling,
pub out: &'a mut MvsOutput,
pub global_mv: &'a mut MotionVector,
pub field_shift: i32,
pub mean_luma_change: &'a mut i32,
pub penalty_zero: u16,
pub penalty_global: u16,
pub bad_sad: u64,
pub bad_range: i32,
pub meander: bool,
pub try_many: bool,
}
struct RecalculateMvsArgs<'a, T> {
out_idx: usize,
fake_gop: &'a FakeGroupOfPlanes,
src_frame: &'a MVFrame,
src_frame_data: &'a FramePlanes<'a, T>,
ref_frame: &'a MVFrame,
ref_frame_data: &'a FramePlanes<'a, T>,
search_type: SearchType,
search_param: i32,
lambda: u32,
penalty_new: u16,
out: &'a mut MvsOutput,
field_shift: i32,
th_sad: u64,
dct_helper: Option<DctHelper>,
dct_mode: DctMode,
smooth: bool,
meander: bool,
}