use std::{
num::{NonZeroU8, NonZeroUsize},
sync::Arc,
};
use aligned::{A64, Aligned};
use arrayvec::ArrayVec;
use num_rational::Rational32;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use v_frame::{
chroma::ChromaSubsampling,
frame::Frame,
pixel::Pixel,
plane::{Plane, PlaneGeometry},
};
use super::importance::{
IMP_BLOCK_MV_UNITS_PER_PIXEL,
IMP_BLOCK_SIZE_IN_MV_UNITS,
IMPORTANCE_BLOCK_SIZE,
};
use crate::{
data::{
block::{BlockOffset, BlockSize, MIB_SIZE_LOG2},
frame::{ALLOWED_REF_FRAMES, FrameInvariants, FrameState, RefType},
motion::{
MEStats,
MV_LOW,
MV_UPP,
MVSamplingMode,
MotionEstimationSubsets,
MotionVector,
ReadGuardMEStats,
RefMEStats,
TileMEStats,
},
plane::{Area, AsRegion, PlaneBlockOffset, PlaneOffset, PlaneRegion, PlaneRegionMut, Rect},
prediction::PredictionMode,
sad::get_sad,
satd::get_satd,
superblock::{
MAX_SB_SIZE_LOG2,
MI_SIZE,
MI_SIZE_LOG2,
SB_SIZE,
SuperBlockOffset,
TileSuperBlockOffset,
},
tile::{TileBlockOffset, TileRect, TileStateMut, TilingInfo},
},
math::{ILog, clamp},
};
macro_rules! search_pattern_subpel {
($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
[ $(MotionVector { $field_a: $ll_a, $field_b: $ll_b } ),*]
};
}
macro_rules! search_pattern {
($field_a:ident: [$($ll_a:expr),*], $field_b:ident: [$($ll_b:expr),*]) => {
[ $(MotionVector { $field_a: $ll_a << 3, $field_b: $ll_b << 3 } ),*]
};
}
const DIAMOND_R1_PATTERN_SUBPEL: [MotionVector; 4] = search_pattern_subpel!(
col: [ 0, 1, 0, -1],
row: [ 1, 0, -1, 0]
);
const DIAMOND_R1_PATTERN: [MotionVector; 4] = search_pattern!(
col: [ 0, 1, 0, -1],
row: [ 1, 0, -1, 0]
);
const UMH_PATTERN: [MotionVector; 16] = search_pattern!(
col: [ -2, -1, 0, 1, 2, 3, 4, 3, 2, 1, 0, -1, -2, 3, -4, -3],
row: [ 4, 4, 4, 4, 4, 2, 0, -2, -4, -4, -4, -4, -4, -2, 0, 2]
);
const HEXAGON_PATTERN: [MotionVector; 6] = search_pattern!(
col: [ 0, 2, 2, 0, -2, -2],
row: [ -2, -1, 1, 2, 1, -1]
);
const SQUARE_REFINE_PATTERN: [MotionVector; 8] = search_pattern!(
col: [ -1, 0, 1, -1, 1, -1, 0, 1],
row: [ 1, 1, 1, 0, 0, -1, -1, -1]
);
pub(crate) fn estimate_inter_costs<T: Pixel>(
frame: &Arc<Frame<T>>,
ref_frame: &Arc<Frame<T>>,
bit_depth: usize,
frame_rate: Rational32,
chroma_sampling: ChromaSubsampling,
buffer: RefMEStats,
) -> f64 {
let last_fi =
FrameInvariants::new_key_frame(frame.y_plane.width().get(), frame.y_plane.height().get());
#[expect(clippy::unwrap_used)]
let fi = FrameInvariants::new_inter_frame(&last_fi, 1).unwrap();
let mut fs = FrameState::new_with_frame_and_me_stats_and_rec(Arc::clone(frame), buffer);
let mut tiling = TilingInfo::from_target_tiles(
frame.y_plane.width().get(),
frame.y_plane.height().get(),
*frame_rate.numer() as f64 / *frame_rate.denom() as f64,
TilingInfo::tile_log2(1, 0).expect("invalid tile_log2 count"),
TilingInfo::tile_log2(1, 0).expect("invalid tile_log2 count"),
chroma_sampling == ChromaSubsampling::Yuv422,
);
compute_motion_vectors(&fi, &mut fs, &mut tiling, bit_depth);
let plane_org = &frame.y_plane;
let plane_ref = &ref_frame.y_plane;
let h_in_imp_b = plane_org.height().get() / IMPORTANCE_BLOCK_SIZE;
let w_in_imp_b = plane_org.width().get() / IMPORTANCE_BLOCK_SIZE;
let stats = &fs.frame_me_stats.read().expect("poisoned lock")[0];
let bsize = BlockSize::from_width_and_height(IMPORTANCE_BLOCK_SIZE, IMPORTANCE_BLOCK_SIZE);
let mut inter_costs = 0;
(0..h_in_imp_b).for_each(|y| {
(0..w_in_imp_b).for_each(|x| {
let mv = stats[y * 2][x * 2].mv;
let reference_x = x as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.col as i64;
let reference_y = y as i64 * IMP_BLOCK_SIZE_IN_MV_UNITS + mv.row as i64;
let region_org = plane_org.region(Area::Rect(Rect {
x: (x * IMPORTANCE_BLOCK_SIZE) as isize,
y: (y * IMPORTANCE_BLOCK_SIZE) as isize,
width: IMPORTANCE_BLOCK_SIZE,
height: IMPORTANCE_BLOCK_SIZE,
}));
let region_ref = plane_ref.region(Area::Rect(Rect {
x: reference_x as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
y: reference_y as isize / IMP_BLOCK_MV_UNITS_PER_PIXEL as isize,
width: IMPORTANCE_BLOCK_SIZE,
height: IMPORTANCE_BLOCK_SIZE,
}));
inter_costs += get_satd(
®ion_org,
®ion_ref,
bsize.width(),
bsize.height(),
bit_depth,
) as u64;
});
});
inter_costs as f64 / (w_in_imp_b * h_in_imp_b) as f64
}
fn compute_motion_vectors<T: Pixel>(
fi: &FrameInvariants<T>,
fs: &mut FrameState<T>,
tiling_info: &mut TilingInfo,
bit_depth: usize,
) {
tiling_info
.tile_iter_mut(fs)
.collect::<Vec<_>>()
.into_par_iter()
.for_each(|mut ctx| {
let ts = &mut ctx.ts;
estimate_tile_motion(fi, ts, bit_depth);
});
}
fn estimate_tile_motion<T: Pixel>(
fi: &FrameInvariants<T>,
ts: &mut TileStateMut<'_, T>,
bit_depth: usize,
) {
let init_size = MIB_SIZE_LOG2;
let mut prev_ssdec: Option<u8> = None;
for mv_size_in_b_log2 in (2..=init_size).rev() {
let init = mv_size_in_b_log2 == init_size;
let ssdec = match init_size - mv_size_in_b_log2 {
0 => 2,
1 => 1,
_ => 0,
};
let new_subsampling = prev_ssdec.is_some_and(|prev| prev != ssdec);
prev_ssdec = Some(ssdec);
let lambda = 0;
for sby in 0..ts.sb_height {
for sbx in 0..ts.sb_width {
let mut tested_frames_flags = 0;
for &ref_frame in ALLOWED_REF_FRAMES {
let frame_flag = 1 << fi.ref_frames[ref_frame.to_index()];
if tested_frames_flags & frame_flag == frame_flag {
continue;
}
tested_frames_flags |= frame_flag;
let tile_bo = TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby })
.block_offset(0, 0);
if new_subsampling {
refine_subsampled_sb_motion(
fi,
ts,
ref_frame,
mv_size_in_b_log2 + 1,
tile_bo,
ssdec,
lambda,
bit_depth,
);
}
estimate_sb_motion(
fi,
ts,
ref_frame,
mv_size_in_b_log2,
tile_bo,
init,
ssdec,
lambda,
bit_depth,
);
}
}
}
}
}
#[expect(clippy::too_many_arguments)]
fn refine_subsampled_sb_motion<T: Pixel>(
fi: &FrameInvariants<T>,
ts: &mut TileStateMut<'_, T>,
ref_frame: RefType,
mv_size_in_b_log2: usize,
tile_bo: TileBlockOffset,
ssdec: u8,
lambda: u32,
bit_depth: usize,
) {
let pix_offset = tile_bo.to_luma_plane_offset();
let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
let mv_size = MI_SIZE << mv_size_in_b_log2;
for y in (0..sb_h).step_by(mv_size) {
for x in (0..sb_w).step_by(mv_size) {
let sub_bo =
tile_bo.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
if let Some(results) = refine_subsampled_motion_estimate(
fi, ts, w, h, sub_bo, ref_frame, ssdec, lambda, bit_depth,
) {
let sad =
(((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) / (w * h) as u64) as u32;
save_me_stats(ts, mv_size_in_b_log2, sub_bo, ref_frame, MEStats {
mv: results.mv,
normalized_sad: sad,
});
}
}
}
}
#[expect(clippy::too_many_arguments)]
fn refine_subsampled_motion_estimate<T: Pixel>(
fi: &FrameInvariants<T>,
ts: &TileStateMut<'_, T>,
w: usize,
h: usize,
tile_bo: TileBlockOffset,
ref_frame: RefType,
ssdec: u8,
lambda: u32,
bit_depth: usize,
) -> Option<MotionSearchResult> {
fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
.as_ref()
.map(|rec| {
let frame_bo = ts.to_frame_block_offset(tile_bo);
let (mvx_min, mvx_max, mvy_min, mvy_max) =
get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
let pmv = [MotionVector { row: 0, col: 0 }; 2];
let po = frame_bo.to_luma_plane_offset();
let (mvx_min, mvx_max, mvy_min, mvy_max) = (
mvx_min >> ssdec,
mvx_max >> ssdec,
mvy_min >> ssdec,
mvy_max >> ssdec,
);
let po = PlaneOffset {
x: po.x >> ssdec,
y: po.y >> ssdec,
};
let p_ref = match ssdec {
0 => &rec.frame.y_plane,
1 => &rec.input_hres,
2 => &rec.input_qres,
_ => unimplemented!(),
};
let org_region = &match ssdec {
0 => ts
.input_tile
.y_plane
.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
1 => ts
.input_hres
.expect("input must have hres")
.region(Area::StartingAt { x: po.x, y: po.y }),
2 => ts
.input_qres
.expect("input must have qres")
.region(Area::StartingAt { x: po.x, y: po.y }),
_ => unimplemented!(),
};
let mv = ts.me_stats[ref_frame.to_index()][tile_bo.0.y][tile_bo.0.x].mv >> ssdec;
let x_lo = po.x + (mv.col as isize / 8 - 1).max(mvx_min / 8);
let x_hi = po.x + (mv.col as isize / 8 + 2).min(mvx_max / 8);
let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8);
let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8);
let mut results = full_search(
x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, 1, lambda, pmv, bit_depth,
);
results.mv = results.mv << ssdec;
results
})
}
fn get_mv_range(
w_in_b: usize,
h_in_b: usize,
bo: PlaneBlockOffset,
blk_w: usize,
blk_h: usize,
) -> (isize, isize, isize, isize) {
let border_w = 128 + blk_w as isize * 8;
let border_h = 128 + blk_h as isize * 8;
let mvx_min = -(bo.0.x as isize) * (8 * MI_SIZE) as isize - border_w;
let mvx_max = ((w_in_b - bo.0.x) as isize - (blk_w / MI_SIZE) as isize)
* (8 * MI_SIZE) as isize
+ border_w;
let mvy_min = -(bo.0.y as isize) * (8 * MI_SIZE) as isize - border_h;
let mvy_max = ((h_in_b - bo.0.y) as isize - (blk_h / MI_SIZE) as isize)
* (8 * MI_SIZE) as isize
+ border_h;
(
mvx_min.max(MV_LOW as isize + 1),
mvx_max.min(MV_UPP as isize - 1),
mvy_min.max(MV_LOW as isize + 1),
mvy_max.min(MV_UPP as isize - 1),
)
}
#[expect(clippy::too_many_arguments)]
fn full_search<T: Pixel>(
x_lo: isize,
x_hi: isize,
y_lo: isize,
y_hi: isize,
w: usize,
h: usize,
org_region: &PlaneRegion<T>,
p_ref: &Plane<T>,
po: PlaneOffset,
step: usize,
lambda: u32,
pmv: [MotionVector; 2],
bit_depth: usize,
) -> MotionSearchResult {
let search_region = p_ref.region(Area::Rect(Rect {
x: x_lo,
y: y_lo,
width: (x_hi - x_lo) as usize + w,
height: (y_hi - y_lo) as usize + h,
}));
let mut best: MotionSearchResult = MotionSearchResult::empty();
for vert_window in search_region.vert_windows(h).step_by(step) {
for ref_window in vert_window.horz_windows(w).step_by(step) {
let &Rect { x, y, .. } = ref_window.rect();
let mv = MotionVector {
row: 8 * (y as i16 - po.y as i16),
col: 8 * (x as i16 - po.x as i16),
};
let rd = compute_mv_rd(
pmv,
lambda,
false,
bit_depth,
w,
h,
mv,
org_region,
&ref_window,
);
if rd.cost < best.rd.cost {
best.rd = rd;
best.mv = mv;
}
}
}
best
}
#[expect(clippy::too_many_arguments)]
fn compute_mv_rd<T: Pixel>(
pmv: [MotionVector; 2],
lambda: u32,
use_satd: bool,
bit_depth: usize,
w: usize,
h: usize,
cand_mv: MotionVector,
plane_org: &PlaneRegion<'_, T>,
plane_ref: &PlaneRegion<'_, T>,
) -> MVCandidateRD {
let sad = if use_satd {
get_satd(plane_org, plane_ref, w, h, bit_depth)
} else {
get_sad(plane_org, plane_ref, w, h, bit_depth)
};
let rate1 = get_mv_rate(cand_mv, pmv[0]);
let rate2 = get_mv_rate(cand_mv, pmv[1]);
let rate = rate1.min(rate2 + 1);
MVCandidateRD {
cost: 256 * sad as u64 + rate as u64 * lambda as u64,
sad,
}
}
fn diff_to_rate(diff: i16) -> u32 {
let d = diff >> 1;
2 * ILog::ilog(d.abs()) as u32
}
fn get_mv_rate(a: MotionVector, b: MotionVector) -> u32 {
diff_to_rate(a.row - b.row) + diff_to_rate(a.col - b.col)
}
#[derive(Debug, Copy, Clone)]
pub struct MotionSearchResult {
pub mv: MotionVector,
pub rd: MVCandidateRD,
}
impl MotionSearchResult {
pub fn empty() -> MotionSearchResult {
MotionSearchResult {
mv: MotionVector::default(),
rd: MVCandidateRD::empty(),
}
}
const fn is_empty(&self) -> bool {
self.rd.cost == u64::MAX
}
}
#[derive(Debug, Copy, Clone)]
pub struct MVCandidateRD {
pub cost: u64,
pub sad: u32,
}
impl MVCandidateRD {
const fn empty() -> MVCandidateRD {
MVCandidateRD {
sad: u32::MAX,
cost: u64::MAX,
}
}
}
fn save_me_stats<T: Pixel>(
ts: &mut TileStateMut<'_, T>,
mv_size_in_b_log2: usize,
tile_bo: TileBlockOffset,
ref_frame: RefType,
stats: MEStats,
) {
let size_in_b = 1 << mv_size_in_b_log2;
let tile_me_stats = &mut ts.me_stats[ref_frame.to_index()];
let tile_bo_x_end = (tile_bo.0.x + size_in_b).min(ts.mi_width);
let tile_bo_y_end = (tile_bo.0.y + size_in_b).min(ts.mi_height);
for mi_y in tile_bo.0.y..tile_bo_y_end {
for a in tile_me_stats[mi_y][tile_bo.0.x..tile_bo_x_end].iter_mut() {
*a = stats;
}
}
}
#[expect(clippy::too_many_arguments)]
fn estimate_sb_motion<T: Pixel>(
fi: &FrameInvariants<T>,
ts: &mut TileStateMut<'_, T>,
ref_frame: RefType,
mv_size_in_b_log2: usize,
tile_bo: TileBlockOffset,
init: bool,
ssdec: u8,
lambda: u32,
bit_depth: usize,
) {
let pix_offset = tile_bo.to_luma_plane_offset();
let sb_h: usize = SB_SIZE.min(ts.height - pix_offset.y as usize);
let sb_w: usize = SB_SIZE.min(ts.width - pix_offset.x as usize);
let mv_size = MI_SIZE << mv_size_in_b_log2;
for y in (0..sb_h).step_by(mv_size) {
for x in (0..sb_w).step_by(mv_size) {
let corner: MVSamplingMode = if init {
MVSamplingMode::INIT
} else {
MVSamplingMode::CORNER {
right: x & mv_size == mv_size,
bottom: y & mv_size == mv_size,
}
};
let sub_bo =
tile_bo.with_offset(x as isize >> MI_SIZE_LOG2, y as isize >> MI_SIZE_LOG2);
let w = mv_size.min(sb_w - x + (1 << ssdec) - 1) >> ssdec;
let h = mv_size.min(sb_h - y + (1 << ssdec) - 1) >> ssdec;
if let Some(results) = estimate_motion(
fi,
ts,
w,
h,
sub_bo,
ref_frame,
None,
corner,
init,
ssdec,
Some(lambda),
bit_depth,
) {
let sad =
(((results.rd.sad as u64) << (MAX_SB_SIZE_LOG2 * 2)) / (w * h) as u64) as u32;
save_me_stats(ts, mv_size_in_b_log2, sub_bo, ref_frame, MEStats {
mv: results.mv,
normalized_sad: sad,
});
}
}
}
}
#[expect(clippy::too_many_arguments)]
fn estimate_motion<T: Pixel>(
fi: &FrameInvariants<T>,
ts: &TileStateMut<'_, T>,
w: usize,
h: usize,
tile_bo: TileBlockOffset,
ref_frame: RefType,
pmv: Option<[MotionVector; 2]>,
corner: MVSamplingMode,
extensive_search: bool,
ssdec: u8,
lambda: Option<u32>,
bit_depth: usize,
) -> Option<MotionSearchResult> {
fi.rec_buffer.frames[fi.ref_frames[ref_frame.to_index()] as usize]
.as_ref()
.map(|rec| {
let frame_bo = ts.to_frame_block_offset(tile_bo);
let (mvx_min, mvx_max, mvy_min, mvy_max) =
get_mv_range(fi.w_in_b, fi.h_in_b, frame_bo, w << ssdec, h << ssdec);
let lambda = lambda.unwrap_or(0);
let global_mv = [MotionVector { row: 0, col: 0 }; 2];
let po = frame_bo.to_luma_plane_offset();
let (mvx_min, mvx_max, mvy_min, mvy_max) = (
mvx_min >> ssdec,
mvx_max >> ssdec,
mvy_min >> ssdec,
mvy_max >> ssdec,
);
let po = PlaneOffset {
x: po.x >> ssdec,
y: po.y >> ssdec,
};
let p_ref = match ssdec {
0 => &rec.frame.y_plane,
1 => &rec.input_hres,
2 => &rec.input_qres,
_ => unimplemented!(),
};
let org_region = &match ssdec {
0 => ts
.input_tile
.y_plane
.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
1 => ts
.input_hres
.expect("input must have hres")
.region(Area::StartingAt { x: po.x, y: po.y }),
2 => ts
.input_qres
.expect("input must have qres")
.region(Area::StartingAt { x: po.x, y: po.y }),
_ => unimplemented!(),
};
let mut best: MotionSearchResult = full_pixel_me(
fi,
ts,
org_region,
p_ref,
tile_bo,
po,
lambda,
pmv.unwrap_or(global_mv),
w,
h,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
ref_frame,
corner,
extensive_search,
ssdec,
bit_depth,
);
if let Some(pmv) = pmv {
best.rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, true, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, best.mv,
);
sub_pixel_me(
fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min, mvy_max, w,
h, true, &mut best, ref_frame, bit_depth,
);
}
best.mv = best.mv << ssdec;
best
})
}
#[expect(clippy::too_many_arguments)]
fn full_pixel_me<T: Pixel>(
fi: &FrameInvariants<T>,
ts: &TileStateMut<'_, T>,
org_region: &PlaneRegion<T>,
p_ref: &Plane<T>,
tile_bo: TileBlockOffset,
po: PlaneOffset,
lambda: u32,
pmv: [MotionVector; 2],
w: usize,
h: usize,
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
ref_frame: RefType,
corner: MVSamplingMode,
extensive_search: bool,
ssdec: u8,
bit_depth: usize,
) -> MotionSearchResult {
let ref_frame_id = ref_frame.to_index();
let tile_me_stats = &ts.me_stats[ref_frame_id].as_const();
let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize]
.as_ref()
.map(|frame_ref| frame_ref.frame_me_stats.read().expect("poisoned lock"));
let subsets = get_subset_predictors(
tile_bo,
tile_me_stats,
frame_ref,
ref_frame_id,
w,
h,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
corner,
ssdec,
);
let try_cands = |predictors: &[MotionVector], best: &mut MotionSearchResult| {
let mut results = get_best_predictor(
po, org_region, p_ref, predictors, bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min,
mvy_max, w, h,
);
fullpel_diamond_search(
po,
org_region,
p_ref,
&mut results,
bit_depth,
pmv,
lambda,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
w,
h,
);
if results.rd.cost < best.rd.cost {
*best = results;
}
};
let mut best: MotionSearchResult = MotionSearchResult::empty();
if !extensive_search {
try_cands(&subsets.all_mvs(), &mut best);
best
} else {
let thresh = (subsets.min_sad as f32 * 1.2) as u32 + (((w * h) as u32) << (bit_depth - 8));
if let Some(median) = subsets.median {
try_cands(&[median], &mut best);
if best.rd.sad < thresh {
return best;
}
}
try_cands(&subsets.subset_b, &mut best);
if best.rd.sad < thresh {
return best;
}
try_cands(&subsets.subset_c, &mut best);
if best.rd.sad < thresh {
return best;
}
uneven_multi_hex_search(
po, org_region, p_ref, &mut best, bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, 24,
);
best
}
}
#[expect(clippy::too_many_arguments)]
fn sub_pixel_me<T: Pixel>(
fi: &FrameInvariants<T>,
po: PlaneOffset,
org_region: &PlaneRegion<T>,
p_ref: &Plane<T>,
lambda: u32,
pmv: [MotionVector; 2],
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
w: usize,
h: usize,
use_satd: bool,
best: &mut MotionSearchResult,
ref_frame: RefType,
bit_depth: usize,
) {
subpel_diamond_search(
fi, po, org_region, p_ref, bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max, w,
h, use_satd, best, ref_frame,
);
}
#[expect(clippy::too_many_arguments)]
fn subpel_diamond_search<T: Pixel>(
fi: &FrameInvariants<T>,
po: PlaneOffset,
org_region: &PlaneRegion<T>,
_p_ref: &Plane<T>,
bit_depth: usize,
pmv: [MotionVector; 2],
lambda: u32,
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
w: usize,
h: usize,
use_satd: bool,
current: &mut MotionSearchResult,
ref_frame: RefType,
) {
let mc_w = w.next_power_of_two();
let mc_h = (h + 1) & !1;
let cfg = PlaneGeometry {
width: NonZeroUsize::new(mc_w).expect("width must not be zero"),
height: NonZeroUsize::new(mc_h).expect("height must not be zero"),
stride: NonZeroUsize::new(mc_w).expect("stride must not be zero"),
pad_left: 0,
pad_right: 0,
pad_top: 0,
pad_bottom: 0,
subsampling_x: NonZeroU8::new(1).expect("non-zero const"),
subsampling_y: NonZeroU8::new(1).expect("non-zero const"),
};
let mut buf: Aligned<A64, [T; 128 * 128]> = Aligned([T::zero(); 128 * 128]);
let mut tmp_region = PlaneRegionMut::from_slice(buf.as_mut(), cfg, Rect {
x: 0,
y: 0,
width: cfg.width.get(),
height: cfg.height.get(),
});
let (mut diamond_radius_log2, diamond_radius_end_log2) = (2u8, 1u8);
loop {
let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
for &offset in &DIAMOND_R1_PATTERN_SUBPEL {
let cand_mv = current.mv + (offset << diamond_radius_log2);
let rd = get_subpel_mv_rd(
fi,
po,
org_region,
bit_depth,
pmv,
lambda,
use_satd,
mvx_min,
mvx_max,
mvy_min,
mvy_max,
w,
h,
cand_mv,
&mut tmp_region,
ref_frame,
);
if rd.cost < best_cand.rd.cost {
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
if current.rd.cost <= best_cand.rd.cost {
if diamond_radius_log2 == diamond_radius_end_log2 {
break;
} else {
diamond_radius_log2 -= 1;
}
} else {
*current = best_cand;
}
}
assert!(!current.is_empty());
}
#[expect(clippy::too_many_arguments)]
fn get_subpel_mv_rd<T: Pixel>(
fi: &FrameInvariants<T>,
po: PlaneOffset,
org_region: &PlaneRegion<T>,
bit_depth: usize,
pmv: [MotionVector; 2],
lambda: u32,
use_satd: bool,
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
w: usize,
h: usize,
cand_mv: MotionVector,
tmp_region: &mut PlaneRegionMut<T>,
ref_frame: RefType,
) -> MVCandidateRD {
if (cand_mv.col as isize) < mvx_min
|| (cand_mv.col as isize) > mvx_max
|| (cand_mv.row as isize) < mvy_min
|| (cand_mv.row as isize) > mvy_max
{
return MVCandidateRD::empty();
}
let tmp_width = tmp_region.rect().width;
let tmp_height = tmp_region.rect().height;
let tile_rect = TileRect {
x: 0,
y: 0,
width: tmp_width,
height: tmp_height,
};
PredictionMode::NEWMV.predict_inter_single(
fi, tile_rect, 0, po, tmp_region,
tmp_width, tmp_height, ref_frame, cand_mv, bit_depth,
);
let plane_ref = tmp_region.as_const();
compute_mv_rd(
pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region, &plane_ref,
)
}
#[expect(clippy::too_many_arguments)]
fn uneven_multi_hex_search<T: Pixel>(
po: PlaneOffset,
org_region: &PlaneRegion<T>,
p_ref: &Plane<T>,
current: &mut MotionSearchResult,
bit_depth: usize,
pmv: [MotionVector; 2],
lambda: u32,
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
w: usize,
h: usize,
me_range: i16,
) {
assert!(!current.is_empty());
let center = current.mv;
for i in (1..=me_range).step_by(2) {
const HORIZONTAL_LINE: [MotionVector; 2] = search_pattern!(
col: [ 0, 0],
row: [-1, 1]
);
for &offset in &HORIZONTAL_LINE {
let cand_mv = center + offset * i;
let rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, cand_mv,
);
if rd.cost < current.rd.cost {
current.mv = cand_mv;
current.rd = rd;
}
}
}
for i in (1..=me_range >> 1).step_by(2) {
const VERTICAL_LINE: [MotionVector; 2] = search_pattern!(
col: [-1, 1],
row: [ 0, 0]
);
for &offset in &VERTICAL_LINE {
let cand_mv = center + offset * i;
let rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, cand_mv,
);
if rd.cost < current.rd.cost {
current.mv = cand_mv;
current.rd = rd;
}
}
}
let center = current.mv;
for row in -2..=2 {
for col in -2..=2 {
if row == 0 && col == 0 {
continue;
}
let cand_mv = center + MotionVector { row, col };
let rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, cand_mv,
);
if rd.cost < current.rd.cost {
current.mv = cand_mv;
current.rd = rd;
}
}
}
let center = current.mv;
let iterations = me_range >> 2;
for i in 1..=iterations {
for &offset in &UMH_PATTERN {
let cand_mv = center + offset * i;
let rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, cand_mv,
);
if rd.cost < current.rd.cost {
current.mv = cand_mv;
current.rd = rd;
}
}
}
hexagon_search(
po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min, mvx_max, mvy_min, mvy_max,
w, h,
);
}
#[expect(clippy::too_many_arguments)]
fn get_subset_predictors(
tile_bo: TileBlockOffset,
tile_me_stats: &TileMEStats<'_>,
frame_ref_opt: Option<ReadGuardMEStats<'_>>,
ref_frame_id: usize,
pix_w: usize,
pix_h: usize,
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
corner: MVSamplingMode,
ssdec: u8,
) -> MotionEstimationSubsets {
let mut min_sad: u32 = u32::MAX;
let mut subset_b = ArrayVec::<MotionVector, 5>::new();
let mut subset_c = ArrayVec::<MotionVector, 5>::new();
let w = ((pix_w << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
let h = ((pix_h << ssdec) + MI_SIZE - 1) >> MI_SIZE_LOG2;
let clipped_half_w = (w >> 1).min(tile_me_stats.cols() - 1 - tile_bo.0.x);
let clipped_half_h = (h >> 1).min(tile_me_stats.rows() - 1 - tile_bo.0.y);
let mut process_cand = |stats: MEStats| -> MotionVector {
min_sad = min_sad.min(stats.normalized_sad);
let mv = stats.mv.quantize_to_fullpel();
MotionVector {
col: clamp(mv.col as isize, mvx_min, mvx_max) as i16,
row: clamp(mv.row as isize, mvy_min, mvy_max) as i16,
}
};
if tile_bo.0.x > 0 {
subset_b.push(process_cand(
tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x - 1],
));
}
if tile_bo.0.y > 0 {
subset_b.push(process_cand(
tile_me_stats[tile_bo.0.y - 1][tile_bo.0.x + clipped_half_w],
));
}
if let MVSamplingMode::CORNER {
right: true,
bottom: _,
} = corner
&& tile_bo.0.x + w < tile_me_stats.cols()
{
subset_b.push(process_cand(
tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + w],
));
}
if let MVSamplingMode::CORNER {
right: _,
bottom: true,
} = corner
&& tile_bo.0.y + h < tile_me_stats.rows()
{
subset_b.push(process_cand(
tile_me_stats[tile_bo.0.y + h][tile_bo.0.x + clipped_half_w],
));
}
let median = if corner != MVSamplingMode::INIT {
Some(process_cand(
tile_me_stats[tile_bo.0.y + clipped_half_h][tile_bo.0.x + clipped_half_w],
))
} else if subset_b.len() != 3 {
None
} else {
let mut rows: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.row).collect();
let mut cols: ArrayVec<i16, 3> = subset_b.iter().map(|&a| a.col).collect();
rows.as_mut_slice().sort_unstable();
cols.as_mut_slice().sort_unstable();
Some(MotionVector {
row: rows[1],
col: cols[1],
})
};
subset_b.push(MotionVector::default());
if let Some(frame_me_stats) = frame_ref_opt {
let prev_frame = &frame_me_stats[ref_frame_id];
let frame_bo = PlaneBlockOffset(BlockOffset {
x: tile_me_stats.x() + tile_bo.0.x,
y: tile_me_stats.y() + tile_bo.0.y,
});
let clipped_half_w = (w >> 1).min(prev_frame.cols - 1 - frame_bo.0.x);
let clipped_half_h = (h >> 1).min(prev_frame.rows - 1 - frame_bo.0.y);
if frame_bo.0.x > 0 {
subset_c.push(process_cand(
prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x - 1],
));
}
if frame_bo.0.y > 0 {
subset_c.push(process_cand(
prev_frame[frame_bo.0.y - 1][frame_bo.0.x + clipped_half_w],
));
}
if frame_bo.0.x + w < prev_frame.cols {
subset_c.push(process_cand(
prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + w],
));
}
if frame_bo.0.y + h < prev_frame.rows {
subset_c.push(process_cand(
prev_frame[frame_bo.0.y + h][frame_bo.0.x + clipped_half_w],
));
}
subset_c.push(process_cand(
prev_frame[frame_bo.0.y + clipped_half_h][frame_bo.0.x + clipped_half_w],
));
}
let min_sad = ((min_sad as u64 * (pix_w * pix_h) as u64) >> (MAX_SB_SIZE_LOG2 * 2)) as u32;
let dec_mv = |mv: MotionVector| MotionVector {
col: mv.col >> ssdec,
row: mv.row >> ssdec,
};
let median = median.map(dec_mv);
for mv in subset_b.iter_mut() {
*mv = dec_mv(*mv);
}
for mv in subset_c.iter_mut() {
*mv = dec_mv(*mv);
}
MotionEstimationSubsets {
min_sad,
median,
subset_b,
subset_c,
}
}
#[expect(clippy::too_many_arguments)]
fn get_best_predictor<T: Pixel>(
po: PlaneOffset,
org_region: &PlaneRegion<T>,
p_ref: &Plane<T>,
predictors: &[MotionVector],
bit_depth: usize,
pmv: [MotionVector; 2],
lambda: u32,
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
w: usize,
h: usize,
) -> MotionSearchResult {
let mut best: MotionSearchResult = MotionSearchResult::empty();
for &init_mv in predictors.iter() {
let rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, init_mv,
);
if rd.cost < best.rd.cost {
best.mv = init_mv;
best.rd = rd;
}
}
best
}
#[expect(clippy::too_many_arguments)]
fn get_fullpel_mv_rd<T: Pixel>(
po: PlaneOffset,
org_region: &PlaneRegion<T>,
p_ref: &Plane<T>,
bit_depth: usize,
pmv: [MotionVector; 2],
lambda: u32,
use_satd: bool,
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
w: usize,
h: usize,
cand_mv: MotionVector,
) -> MVCandidateRD {
if (cand_mv.col as isize) < mvx_min
|| (cand_mv.col as isize) > mvx_max
|| (cand_mv.row as isize) < mvy_min
|| (cand_mv.row as isize) > mvy_max
{
return MVCandidateRD::empty();
}
let plane_ref = p_ref.region(Area::StartingAt {
x: po.x + (cand_mv.col / 8) as isize,
y: po.y + (cand_mv.row / 8) as isize,
});
compute_mv_rd(
pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region, &plane_ref,
)
}
#[expect(clippy::too_many_arguments)]
fn hexagon_search<T: Pixel>(
po: PlaneOffset,
org_region: &PlaneRegion<T>,
p_ref: &Plane<T>,
current: &mut MotionSearchResult,
bit_depth: usize,
pmv: [MotionVector; 2],
lambda: u32,
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
w: usize,
h: usize,
) {
let mut best_cand_idx: usize = 0;
let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
for (i, &pattern_mv) in HEXAGON_PATTERN.iter().enumerate() {
let cand_mv = current.mv + pattern_mv;
let rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, cand_mv,
);
if rd.cost < best_cand.rd.cost {
best_cand_idx = i;
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
while best_cand.rd.cost < current.rd.cost {
*current = best_cand;
best_cand = MotionSearchResult::empty();
let center_cand_idx = best_cand_idx;
for idx_offset_mod6 in 5..=7 {
let i = (center_cand_idx + idx_offset_mod6) % 6;
let cand_mv = current.mv + HEXAGON_PATTERN[i];
let rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, cand_mv,
);
if rd.cost < best_cand.rd.cost {
best_cand_idx = i;
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
}
let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
for &offset in &SQUARE_REFINE_PATTERN {
let cand_mv = current.mv + offset;
let rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, cand_mv,
);
if rd.cost < best_cand.rd.cost {
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
if best_cand.rd.cost < current.rd.cost {
*current = best_cand;
}
assert!(!current.is_empty());
}
#[expect(clippy::too_many_arguments)]
fn fullpel_diamond_search<T: Pixel>(
po: PlaneOffset,
org_region: &PlaneRegion<T>,
p_ref: &Plane<T>,
current: &mut MotionSearchResult,
bit_depth: usize,
pmv: [MotionVector; 2],
lambda: u32,
mvx_min: isize,
mvx_max: isize,
mvy_min: isize,
mvy_max: isize,
w: usize,
h: usize,
) {
let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8);
loop {
let mut best_cand: MotionSearchResult = MotionSearchResult::empty();
for &offset in &DIAMOND_R1_PATTERN {
let cand_mv = current.mv + (offset << diamond_radius_log2);
let rd = get_fullpel_mv_rd(
po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, mvx_max, mvy_min,
mvy_max, w, h, cand_mv,
);
if rd.cost < best_cand.rd.cost {
best_cand.mv = cand_mv;
best_cand.rd = rd;
}
}
if current.rd.cost <= best_cand.rd.cost {
if diamond_radius_log2 == diamond_radius_end_log2 {
break;
} else {
diamond_radius_log2 -= 1;
}
} else {
*current = best_cand;
}
}
assert!(!current.is_empty());
}