use crate::{GpuDevice, GpuError, Result};
use rayon::prelude::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TargetCodec {
Av1,
Vp9,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BlockPartition {
Fixed16x16,
Fixed32x32,
Fixed64x64,
Fixed128x128,
Adaptive,
}
impl Default for BlockPartition {
fn default() -> Self {
Self::Fixed16x16
}
}
#[derive(Debug, Clone)]
pub struct MotionEstimationConfig {
pub codec: TargetCodec,
pub partition: BlockPartition,
pub search_radius: u32,
pub subpixel_refinement: bool,
pub metric: MotionMetric,
pub pyramid_levels: u32,
}
impl Default for MotionEstimationConfig {
fn default() -> Self {
Self {
codec: TargetCodec::Av1,
partition: BlockPartition::default(),
search_radius: 32,
subpixel_refinement: true,
metric: MotionMetric::Sad,
pyramid_levels: 3,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MotionMetric {
Sad,
Ssd,
Hadamard,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct MotionVector {
pub dx: i16,
pub dy: i16,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct SubpixelMv {
pub dx: i32,
pub dy: i32,
}
#[derive(Debug, Clone)]
pub struct BlockMvResult {
pub block_x: u32,
pub block_y: u32,
pub mv: MotionVector,
pub subpixel_mv: Option<SubpixelMv>,
pub cost: u32,
}
#[derive(Debug, Clone)]
pub struct FrameMvResult {
pub width: u32,
pub height: u32,
pub block_mvs: Vec<BlockMvResult>,
pub block_size: u32,
pub used_gpu: bool,
}
impl FrameMvResult {
#[must_use]
pub fn blocks_x(&self) -> u32 {
self.width.div_ceil(self.block_size)
}
#[must_use]
pub fn blocks_y(&self) -> u32 {
self.height.div_ceil(self.block_size)
}
#[must_use]
pub fn mean_mv_magnitude(&self) -> f32 {
if self.block_mvs.is_empty() {
return 0.0;
}
let sum: f64 = self
.block_mvs
.iter()
.map(|b| {
let dx = f64::from(b.mv.dx);
let dy = f64::from(b.mv.dy);
(dx * dx + dy * dy).sqrt()
})
.sum();
(sum / self.block_mvs.len() as f64) as f32
}
}
pub struct MotionEstimator {
config: MotionEstimationConfig,
}
impl MotionEstimator {
#[must_use]
pub fn new(config: MotionEstimationConfig) -> Self {
Self { config }
}
#[must_use]
pub fn av1_default() -> Self {
Self::new(MotionEstimationConfig {
codec: TargetCodec::Av1,
partition: BlockPartition::Fixed64x64,
search_radius: 48,
subpixel_refinement: true,
metric: MotionMetric::Sad,
pyramid_levels: 3,
})
}
#[must_use]
pub fn vp9_default() -> Self {
Self::new(MotionEstimationConfig {
codec: TargetCodec::Vp9,
partition: BlockPartition::Fixed64x64,
search_radius: 32,
subpixel_refinement: true,
metric: MotionMetric::Sad,
pyramid_levels: 2,
})
}
pub fn estimate(
&self,
device: &GpuDevice,
reference: &[u8],
current: &[u8],
width: u32,
height: u32,
) -> Result<FrameMvResult> {
if reference.len() < (width * height) as usize {
return Err(GpuError::InvalidBufferSize {
expected: (width * height) as usize,
actual: reference.len(),
});
}
if current.len() < (width * height) as usize {
return Err(GpuError::InvalidBufferSize {
expected: (width * height) as usize,
actual: current.len(),
});
}
if width == 0 || height == 0 {
return Err(GpuError::InvalidDimensions { width, height });
}
if !device.is_fallback {
if let Ok(result) = self.estimate_gpu(device, reference, current, width, height) {
return Ok(result);
}
}
self.estimate_cpu(reference, current, width, height)
}
fn estimate_gpu(
&self,
_device: &GpuDevice,
reference: &[u8],
current: &[u8],
width: u32,
height: u32,
) -> Result<FrameMvResult> {
let _ = (reference, current, width, height);
Err(GpuError::NotSupported(
"GPU motion estimation shaders are not yet compiled".to_string(),
))
}
fn estimate_cpu(
&self,
reference: &[u8],
current: &[u8],
width: u32,
height: u32,
) -> Result<FrameMvResult> {
if width == 0 || height == 0 {
return Err(GpuError::InvalidDimensions { width, height });
}
let required = (width as usize)
.checked_mul(height as usize)
.ok_or(GpuError::InvalidDimensions { width, height })?;
if reference.len() < required {
return Err(GpuError::InvalidBufferSize {
expected: required,
actual: reference.len(),
});
}
if current.len() < required {
return Err(GpuError::InvalidBufferSize {
expected: required,
actual: current.len(),
});
}
let block_size = match self.config.partition {
BlockPartition::Fixed16x16 | BlockPartition::Adaptive => 16u32,
BlockPartition::Fixed32x32 => 32,
BlockPartition::Fixed64x64 => 64,
BlockPartition::Fixed128x128 => 128,
};
let blocks_x = width.div_ceil(block_size);
let blocks_y = height.div_ceil(block_size);
let n_blocks = (blocks_x * blocks_y) as usize;
let block_mvs: Vec<BlockMvResult> = (0..n_blocks)
.into_par_iter()
.map(|idx| {
let bx = (idx as u32 % blocks_x) * block_size;
let by = (idx as u32 / blocks_x) * block_size;
self.match_block(reference, current, width, height, bx, by, block_size)
})
.collect();
Ok(FrameMvResult {
width,
height,
block_mvs,
block_size,
used_gpu: false,
})
}
#[allow(clippy::too_many_arguments)]
fn match_block(
&self,
reference: &[u8],
current: &[u8],
width: u32,
height: u32,
bx: u32,
by: u32,
block_size: u32,
) -> BlockMvResult {
let w = width as usize;
let sr = self.config.search_radius as i32;
let bs = block_size as usize;
let zero_cost = self.compute_sad(
reference,
current,
w,
width as usize,
height as usize,
bx as usize,
by as usize,
bx as usize,
by as usize,
bs,
);
let mut best_cost = zero_cost;
let mut best_mv = MotionVector::default();
for dy in -sr..=sr {
for dx in -sr..=sr {
if dx == 0 && dy == 0 {
continue;
}
let ref_x = bx as i32 + dx;
let ref_y = by as i32 + dy;
if ref_x < 0
|| ref_y < 0
|| ref_x + bs as i32 > width as i32
|| ref_y + bs as i32 > height as i32
{
continue;
}
let cost = self.compute_sad(
reference,
current,
w,
width as usize,
height as usize,
ref_x as usize,
ref_y as usize,
bx as usize,
by as usize,
bs,
);
if cost < best_cost {
best_cost = cost;
best_mv = MotionVector {
dx: dx as i16,
dy: dy as i16,
};
}
}
}
let subpixel_mv = if self.config.subpixel_refinement {
Some(SubpixelMv {
dx: i32::from(best_mv.dx) * 4,
dy: i32::from(best_mv.dy) * 4,
})
} else {
None
};
BlockMvResult {
block_x: bx,
block_y: by,
mv: best_mv,
subpixel_mv,
cost: best_cost,
}
}
#[allow(clippy::too_many_arguments)]
fn compute_sad(
&self,
reference: &[u8],
current: &[u8],
_stride: usize,
width: usize,
_height: usize,
ref_x: usize,
ref_y: usize,
cur_x: usize,
cur_y: usize,
block_size: usize,
) -> u32 {
let mut sad = 0u32;
for row in 0..block_size {
for col in 0..block_size {
let cur_idx = (cur_y + row) * width + (cur_x + col);
let ref_idx = (ref_y + row) * width + (ref_x + col);
if cur_idx < current.len() && ref_idx < reference.len() {
sad += u32::from(current[cur_idx].abs_diff(reference[ref_idx]));
}
}
}
sad
}
}
#[cfg(test)]
mod tests {
use super::*;
fn gray_frame(w: u32, h: u32, value: u8) -> Vec<u8> {
vec![value; (w * h) as usize]
}
fn shifted_frame(w: u32, h: u32, dx: i32, dy: i32) -> Vec<u8> {
let mut state: u64 = 0x5851_F42D_4C95_7F2D;
let mut frame = vec![0u8; (w * h) as usize];
for pixel in frame.iter_mut() {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
*pixel = ((state >> 33) & 0xFF) as u8;
}
let mut shifted = vec![128u8; (w * h) as usize];
for y in 0..h as i32 {
for x in 0..w as i32 {
let sx = x + dx;
let sy = y + dy;
if sx >= 0 && sy >= 0 && sx < w as i32 && sy < h as i32 {
shifted[(sy as usize) * w as usize + sx as usize] =
frame[y as usize * w as usize + x as usize];
}
}
}
shifted
}
#[test]
fn test_estimator_default_config() {
let e = MotionEstimator::av1_default();
assert_eq!(e.config.codec, TargetCodec::Av1);
}
#[test]
fn test_vp9_default_config() {
let e = MotionEstimator::vp9_default();
assert_eq!(e.config.codec, TargetCodec::Vp9);
}
#[test]
fn test_zero_mv_for_identical_frames() {
let w = 64u32;
let h = 64u32;
let frame = gray_frame(w, h, 128);
let e = MotionEstimator::new(MotionEstimationConfig {
partition: BlockPartition::Fixed16x16,
search_radius: 4,
subpixel_refinement: false,
..MotionEstimationConfig::default()
});
let result = e
.estimate_cpu(&frame, &frame, w, h)
.expect("CPU estimate failed");
for bm in &result.block_mvs {
assert_eq!(bm.mv.dx, 0, "dx should be 0 for identical frames");
assert_eq!(bm.mv.dy, 0, "dy should be 0 for identical frames");
}
}
#[test]
fn test_mv_detected_for_shifted_frame() {
let w = 64u32;
let h = 64u32;
let reference = shifted_frame(w, h, 0, 0);
let current = shifted_frame(w, h, 4, 0);
let e = MotionEstimator::new(MotionEstimationConfig {
partition: BlockPartition::Fixed16x16,
search_radius: 8,
subpixel_refinement: false,
..MotionEstimationConfig::default()
});
let result = e
.estimate_cpu(&reference, ¤t, w, h)
.expect("CPU estimate failed");
let matched = result
.block_mvs
.iter()
.filter(|b| b.mv.dx.abs() >= 3)
.count();
assert!(
matched > result.block_mvs.len() / 2,
"expected most blocks to detect horizontal shift"
);
}
#[test]
fn test_invalid_dimensions_rejected() {
let e = MotionEstimator::av1_default();
let frame = vec![0u8; 64];
let result = e.estimate_cpu(&frame, &frame, 0, 8);
assert!(result.is_err());
}
#[test]
fn test_buffer_too_small_rejected() {
let e = MotionEstimator::av1_default();
let small = vec![0u8; 4];
let frame = vec![0u8; 64 * 64];
let result = e.estimate_cpu(&small, &frame, 64, 64);
assert!(result.is_err(), "undersized reference should be rejected");
}
#[test]
fn test_mean_mv_magnitude_zero_for_static() {
let w = 32u32;
let h = 32u32;
let frame = gray_frame(w, h, 100);
let e = MotionEstimator::new(MotionEstimationConfig {
partition: BlockPartition::Fixed16x16,
search_radius: 2,
subpixel_refinement: false,
..MotionEstimationConfig::default()
});
let result = e
.estimate_cpu(&frame, &frame, w, h)
.expect("CPU estimate failed");
assert_eq!(result.mean_mv_magnitude(), 0.0);
}
#[test]
fn test_blocks_dimensions() {
let w = 64u32;
let h = 32u32;
let frame = gray_frame(w, h, 0);
let e = MotionEstimator::new(MotionEstimationConfig {
partition: BlockPartition::Fixed16x16,
search_radius: 2,
subpixel_refinement: false,
..MotionEstimationConfig::default()
});
let result = e
.estimate_cpu(&frame, &frame, w, h)
.expect("CPU estimate failed");
assert_eq!(result.blocks_x(), 4);
assert_eq!(result.blocks_y(), 2);
assert_eq!(result.block_mvs.len(), 8);
}
#[test]
fn test_subpixel_refinement_present() {
let w = 16u32;
let h = 16u32;
let frame = gray_frame(w, h, 128);
let e = MotionEstimator::new(MotionEstimationConfig {
partition: BlockPartition::Fixed16x16,
search_radius: 2,
subpixel_refinement: true,
..MotionEstimationConfig::default()
});
let result = e
.estimate_cpu(&frame, &frame, w, h)
.expect("CPU estimate failed");
for bm in &result.block_mvs {
assert!(
bm.subpixel_mv.is_some(),
"subpixel_mv should be present when refinement is enabled"
);
}
}
#[test]
fn test_subpixel_refinement_absent_when_disabled() {
let w = 16u32;
let h = 16u32;
let frame = gray_frame(w, h, 64);
let e = MotionEstimator::new(MotionEstimationConfig {
partition: BlockPartition::Fixed16x16,
search_radius: 2,
subpixel_refinement: false,
..MotionEstimationConfig::default()
});
let result = e
.estimate_cpu(&frame, &frame, w, h)
.expect("CPU estimate failed");
for bm in &result.block_mvs {
assert!(
bm.subpixel_mv.is_none(),
"subpixel_mv should be absent when refinement is disabled"
);
}
}
}