#[cfg(test)]
mod tests;
use std::{
cmp::{max, min},
num::NonZeroUsize,
};
use anyhow::{Result, anyhow, bail, ensure};
use semisafe::slice::get as semisafe_get;
use semisafe::slice::get_mut as semisafe_get_mut;
use crate::{
analysis::MVAnalysisData,
fake::group_of_planes::FakeGroupOfPlanes,
frame::{FramePlanesMut, FrameView, PlaneSizeTuple},
params::Subpel,
resize::SimpleResize,
util::{Pixel, blend_plane, check_and_pad_mask_small, make_vector_occlusion_mask_time},
video::{ColorFamily, SampleType, VideoInfo},
};
#[derive(Debug, Clone, Copy)]
pub struct FlowInterOptions {
pub time: f64,
pub ml: f64,
}
pub struct FlowInter {
base: FlowInterpolationBase,
time256: i32,
}
pub struct FlowInterExtraVectors<'a> {
pub backward: &'a FakeGroupOfPlanes,
pub forward: &'a FakeGroupOfPlanes,
}
pub(crate) struct FlowInterpolationBase {
backward_data: MVAnalysisData,
forward_data: MVAnalysisData,
ml: f64,
blk_x_padded: NonZeroUsize,
blk_y_padded: NonZeroUsize,
height_padded: NonZeroUsize,
height_padded_uv: Option<NonZeroUsize>,
width_uv: Option<NonZeroUsize>,
height_uv: Option<NonZeroUsize>,
h_padding_uv: usize,
v_padding_uv: usize,
vector_pitch_y: NonZeroUsize,
vector_pitch_uv: Option<NonZeroUsize>,
upsizer: SimpleResize,
upsizer_uv: Option<SimpleResize>,
}
#[derive(Debug, Clone, Copy)]
pub(crate) enum FlowInterpolationMode {
Simple,
Regular,
Extra,
}
#[derive(Debug)]
pub(crate) struct ResizedVectorPair {
pub(crate) vx_full: Vec<i16>,
pub(crate) vy_full: Vec<i16>,
}
pub(crate) struct InterpolationField {
pub(crate) vectors: ResizedVectorPair,
pub(crate) mask_full: Vec<u8>,
}
impl FlowInter {
#[inline]
pub fn new(
info: VideoInfo,
backward_data: MVAnalysisData,
forward_data: MVAnalysisData,
options: FlowInterOptions,
) -> Result<Self> {
ensure!(
(0.0..=100.0).contains(&options.time),
"FlowInter: time must be between 0 and 100 % (inclusive)."
);
ensure!(options.ml > 0.0, "FlowInter: ml must be greater than 0.");
Ok(Self {
base: FlowInterpolationBase::new(
info,
backward_data,
forward_data,
options.ml,
"FlowInter",
)?,
time256: (options.time * 256.0 / 100.0) as i32,
})
}
#[inline]
pub fn render_frame<T: Pixel>(
&self,
backward: &FrameView<'_, T>,
forward: &FrameView<'_, T>,
output: &mut FramePlanesMut<'_, T>,
output_pitch: PlaneSizeTuple,
backward_vectors: &FakeGroupOfPlanes,
forward_vectors: &FakeGroupOfPlanes,
extra_vectors: Option<FlowInterExtraVectors<'_>>,
) -> Result<()> {
self.base.render_frame(
FlowInterpolationMode::Regular,
self.time256,
backward,
forward,
output,
output_pitch,
backward_vectors,
forward_vectors,
extra_vectors,
)
}
#[inline]
pub fn blend_frame<T: Pixel>(
&self,
current: &FrameView<'_, T>,
future: &FrameView<'_, T>,
output: &mut FramePlanesMut<'_, T>,
output_pitch: PlaneSizeTuple,
) -> Result<()> {
self.base
.blend_frame(self.time256, current, future, output, output_pitch)
}
}
impl FlowInterpolationBase {
#[inline]
pub(crate) fn new(
info: VideoInfo,
backward_data: MVAnalysisData,
forward_data: MVAnalysisData,
ml: f64,
filter_name: &str,
) -> Result<Self> {
let format = info.format;
if format.bits_per_sample.get() > 16 {
bail!("{filter_name}: input clip must be 8-16 bits");
}
if format.sample_type != SampleType::Integer {
bail!("{filter_name}: input clip must be integer super_format");
}
if ![ColorFamily::Yuv, ColorFamily::Gray].contains(&format.color_family)
|| format.sub_sampling_w > 1
|| format.sub_sampling_h > 1
{
bail!("{filter_name}: input clip must be GRAY, 420, 422, 440, or 444");
}
backward_data.check_similarity(&forward_data, filter_name, "mvbw", "mvfw")?;
ensure!(
backward_data.delta_frame > 0 && forward_data.delta_frame > 0,
"{filter_name}: cannot use motion vectors with absolute frame references."
);
ensure!(
backward_data.delta_frame == forward_data.delta_frame,
"{filter_name}: mvbw and mvfw must be generated with the same delta."
);
ensure!(
backward_data.is_backward,
"{filter_name}: mvbw must be generated with isb=True."
);
ensure!(
!forward_data.is_backward,
"{filter_name}: mvfw must be generated with isb=False."
);
ensure!(
info.resolution.width == backward_data.width.get()
&& info.resolution.height == backward_data.height.get(),
"{filter_name}: wrong source or super clip frame size."
);
let expected_x_ratio = 1usize << usize::from(format.sub_sampling_w);
let expected_y_ratio = 1usize << usize::from(format.sub_sampling_h);
ensure!(
backward_data.x_ratio_uv.get() as usize == expected_x_ratio
&& backward_data.y_ratio_uv.get() as usize == expected_y_ratio,
"{filter_name}: input clip subsampling does not match vector metadata."
);
let geometry = build_flow_vector_geometry(
&backward_data,
format.color_family == ColorFamily::Yuv,
filter_name,
)?;
let x_ratio_uv = NonZeroUsize::from(backward_data.x_ratio_uv);
let y_ratio_uv = NonZeroUsize::from(backward_data.y_ratio_uv);
Ok(Self {
backward_data,
forward_data,
ml,
blk_x_padded: geometry.blk_x_padded,
blk_y_padded: geometry.blk_y_padded,
height_padded: geometry.height_padded,
height_padded_uv: geometry.height_padded_uv,
width_uv: geometry.width_uv,
height_uv: geometry.height_uv,
h_padding_uv: backward_data.h_padding / x_ratio_uv.get(),
v_padding_uv: backward_data.v_padding / y_ratio_uv.get(),
vector_pitch_y: geometry.vector_pitch,
vector_pitch_uv: geometry.vector_pitch_uv,
upsizer: geometry.upsizer,
upsizer_uv: geometry.upsizer_uv,
})
}
#[inline]
pub(crate) fn render_frame<T: Pixel>(
&self,
mode: FlowInterpolationMode,
time256: i32,
backward: &FrameView<'_, T>,
forward: &FrameView<'_, T>,
output: &mut FramePlanesMut<'_, T>,
output_pitch: PlaneSizeTuple,
backward_vectors: &FakeGroupOfPlanes,
forward_vectors: &FakeGroupOfPlanes,
extra_vectors: Option<FlowInterExtraVectors<'_>>,
) -> Result<()> {
let (mut vx_small_y_b, mut vy_small_y_b) = make_vector_small_masks(
backward_vectors,
self.backward_data.blk_x.get(),
self.backward_data.blk_y.get(),
self.blk_x_padded.get(),
self.blk_y_padded.get(),
);
let (mut vx_small_y_f, mut vy_small_y_f) = make_vector_small_masks(
forward_vectors,
self.forward_data.blk_x.get(),
self.forward_data.blk_y.get(),
self.blk_x_padded.get(),
self.blk_y_padded.get(),
);
check_and_pad_small_vectors(
&mut vx_small_y_b,
&mut vy_small_y_b,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
self.backward_data.blk_x.get(),
self.backward_data.blk_y.get(),
);
check_and_pad_small_vectors(
&mut vx_small_y_f,
&mut vy_small_y_f,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
self.forward_data.blk_x.get(),
self.forward_data.blk_y.get(),
);
let mut mask_small_b = make_vector_occlusion_mask_time(
backward_vectors,
true,
self.backward_data.blk_x.get(),
self.backward_data.blk_y.get(),
self.ml,
1.0,
self.backward_data.pel,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
256 - time256,
self.backward_data.blk_size_x.get() - self.backward_data.overlap_x,
self.backward_data.blk_size_y.get() - self.backward_data.overlap_y,
);
let mut mask_small_f = make_vector_occlusion_mask_time(
forward_vectors,
false,
self.forward_data.blk_x.get(),
self.forward_data.blk_y.get(),
self.ml,
1.0,
self.forward_data.pel,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
time256,
self.forward_data.blk_size_x.get() - self.forward_data.overlap_x,
self.forward_data.blk_size_y.get() - self.forward_data.overlap_y,
);
check_and_pad_mask_small(
&mut mask_small_b,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
self.backward_data.blk_x.get(),
self.backward_data.blk_y.get(),
);
check_and_pad_mask_small(
&mut mask_small_f,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
self.forward_data.blk_x.get(),
self.forward_data.blk_y.get(),
);
let field_y_b = resize_interpolation_field(
&self.upsizer,
self.blk_x_padded,
self.vector_pitch_y,
self.height_padded,
&vx_small_y_b,
&vy_small_y_b,
&mask_small_b,
);
let field_y_f = resize_interpolation_field(
&self.upsizer,
self.blk_x_padded,
self.vector_pitch_y,
self.height_padded,
&vx_small_y_f,
&vy_small_y_f,
&mask_small_f,
);
#[allow(clippy::type_complexity)]
let extra_small_y = extra_vectors.map(|extra| {
let (mut vx_small_y_bb, mut vy_small_y_bb) = make_vector_small_masks(
extra.backward,
self.backward_data.blk_x.get(),
self.backward_data.blk_y.get(),
self.blk_x_padded.get(),
self.blk_y_padded.get(),
);
let (mut vx_small_y_ff, mut vy_small_y_ff) = make_vector_small_masks(
extra.forward,
self.forward_data.blk_x.get(),
self.forward_data.blk_y.get(),
self.blk_x_padded.get(),
self.blk_y_padded.get(),
);
check_and_pad_small_vectors(
&mut vx_small_y_bb,
&mut vy_small_y_bb,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
self.backward_data.blk_x.get(),
self.backward_data.blk_y.get(),
);
check_and_pad_small_vectors(
&mut vx_small_y_ff,
&mut vy_small_y_ff,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
self.forward_data.blk_x.get(),
self.forward_data.blk_y.get(),
);
(vx_small_y_bb, vy_small_y_bb, vx_small_y_ff, vy_small_y_ff)
});
let extra_vectors_y = extra_small_y.as_ref().map(|(vx_bb, vy_bb, vx_ff, vy_ff)| {
(
resize_vector_pair(
&self.upsizer,
self.blk_x_padded,
self.vector_pitch_y,
vx_bb,
vy_bb,
),
resize_vector_pair(
&self.upsizer,
self.blk_x_padded,
self.vector_pitch_y,
vx_ff,
vy_ff,
),
)
});
render_interpolation_plane(
mode,
extra_vectors_y.as_ref(),
output.plane_mut(0)?,
pitch_for_plane(output_pitch, 0)?.get(),
backward.plane(0)?,
backward.pitch_for_plane(0)?.get(),
reference_origin_offset(
backward.pitch_for_plane(0)?,
self.backward_data.h_padding,
self.backward_data.v_padding,
self.backward_data.pel,
),
forward.plane(0)?,
forward.pitch_for_plane(0)?.get(),
reference_origin_offset(
forward.pitch_for_plane(0)?,
self.forward_data.h_padding,
self.forward_data.v_padding,
self.forward_data.pel,
),
&field_y_b,
&field_y_f,
self.vector_pitch_y.get(),
self.backward_data.width.get(),
self.backward_data.height.get(),
time256,
self.backward_data.pel,
);
if let Some(upsizer_uv) = &self.upsizer_uv {
let vx_small_uv_b = vector_small_mask_y_to_uv(
&vx_small_y_b,
self.backward_data.x_ratio_uv.get() as usize,
);
let vy_small_uv_b = vector_small_mask_y_to_uv(
&vy_small_y_b,
self.backward_data.y_ratio_uv.get() as usize,
);
let vx_small_uv_f = vector_small_mask_y_to_uv(
&vx_small_y_f,
self.forward_data.x_ratio_uv.get() as usize,
);
let vy_small_uv_f = vector_small_mask_y_to_uv(
&vy_small_y_f,
self.forward_data.y_ratio_uv.get() as usize,
);
let field_uv_b = resize_interpolation_field(
upsizer_uv,
self.blk_x_padded,
self.vector_pitch_uv.expect("chroma pitch must exist"),
self.height_padded_uv.expect("chroma height must exist"),
&vx_small_uv_b,
&vy_small_uv_b,
&mask_small_b,
);
let field_uv_f = resize_interpolation_field(
upsizer_uv,
self.blk_x_padded,
self.vector_pitch_uv.expect("chroma pitch must exist"),
self.height_padded_uv.expect("chroma height must exist"),
&vx_small_uv_f,
&vy_small_uv_f,
&mask_small_f,
);
let extra_vectors_uv = extra_small_y.as_ref().map(|(vx_bb, vy_bb, vx_ff, vy_ff)| {
let vx_small_uv_bb =
vector_small_mask_y_to_uv(vx_bb, self.backward_data.x_ratio_uv.get() as usize);
let vy_small_uv_bb =
vector_small_mask_y_to_uv(vy_bb, self.backward_data.y_ratio_uv.get() as usize);
let vx_small_uv_ff =
vector_small_mask_y_to_uv(vx_ff, self.forward_data.x_ratio_uv.get() as usize);
let vy_small_uv_ff =
vector_small_mask_y_to_uv(vy_ff, self.forward_data.y_ratio_uv.get() as usize);
(
resize_vector_pair(
upsizer_uv,
self.blk_x_padded,
self.vector_pitch_uv.expect("chroma pitch must exist"),
&vx_small_uv_bb,
&vy_small_uv_bb,
),
resize_vector_pair(
upsizer_uv,
self.blk_x_padded,
self.vector_pitch_uv.expect("chroma pitch must exist"),
&vx_small_uv_ff,
&vy_small_uv_ff,
),
)
});
for plane in [1_usize, 2] {
render_interpolation_plane(
mode,
extra_vectors_uv.as_ref(),
output.plane_mut(plane)?,
pitch_for_plane(output_pitch, plane)?.get(),
backward.plane(plane)?,
backward.pitch_for_plane(plane)?.get(),
reference_origin_offset(
backward.pitch_for_plane(plane)?,
self.h_padding_uv,
self.v_padding_uv,
self.backward_data.pel,
),
forward.plane(plane)?,
forward.pitch_for_plane(plane)?.get(),
reference_origin_offset(
forward.pitch_for_plane(plane)?,
self.h_padding_uv,
self.v_padding_uv,
self.forward_data.pel,
),
&field_uv_b,
&field_uv_f,
self.vector_pitch_uv.expect("chroma pitch must exist").get(),
self.width_uv.expect("chroma width must exist").get(),
self.height_uv.expect("chroma height must exist").get(),
time256,
self.backward_data.pel,
);
}
}
Ok(())
}
#[inline]
pub(crate) fn blend_frame<T: Pixel>(
&self,
time256: i32,
current: &FrameView<'_, T>,
future: &FrameView<'_, T>,
output: &mut FramePlanesMut<'_, T>,
output_pitch: PlaneSizeTuple,
) -> Result<()> {
for plane in 0..current.plane_count() {
blend_plane(
output.plane_mut(plane)?,
current.plane(plane)?,
future.plane(plane)?,
pitch_for_plane(output_pitch, plane)?.get(),
current.pitch_for_plane(plane)?.get(),
future.pitch_for_plane(plane)?.get(),
plane_width(&self.backward_data, plane),
plane_height(&self.backward_data, plane),
time256,
);
}
Ok(())
}
}
struct FlowVectorGeometry {
blk_x_padded: NonZeroUsize,
blk_y_padded: NonZeroUsize,
height_padded: NonZeroUsize,
height_padded_uv: Option<NonZeroUsize>,
width_uv: Option<NonZeroUsize>,
height_uv: Option<NonZeroUsize>,
vector_pitch: NonZeroUsize,
vector_pitch_uv: Option<NonZeroUsize>,
upsizer: SimpleResize,
upsizer_uv: Option<SimpleResize>,
}
fn build_flow_vector_geometry(
data: &MVAnalysisData,
include_chroma: bool,
filter_name: &str,
) -> Result<FlowVectorGeometry> {
let error = || anyhow!("{filter_name}: wrong source or super clip frame size.");
let mut blk_x_padded = data.blk_x.get();
while blk_x_padded * (data.blk_size_x.get() - data.overlap_x) + data.overlap_x
< data.width.get()
{
blk_x_padded += 1;
}
let mut blk_y_padded = data.blk_y.get();
while blk_y_padded * (data.blk_size_y.get() - data.overlap_y) + data.overlap_y
< data.height.get()
{
blk_y_padded += 1;
}
let blk_x_padded = NonZeroUsize::new(blk_x_padded).ok_or_else(error)?;
let blk_y_padded = NonZeroUsize::new(blk_y_padded).ok_or_else(error)?;
let width_padded = NonZeroUsize::new(
blk_x_padded.get() * (data.blk_size_x.get() - data.overlap_x) + data.overlap_x,
)
.ok_or_else(error)?;
let height_padded = NonZeroUsize::new(
blk_y_padded.get() * (data.blk_size_y.get() - data.overlap_y) + data.overlap_y,
)
.ok_or_else(error)?;
let vector_pitch = NonZeroUsize::new((width_padded.get() + 15) & !15).ok_or_else(error)?;
let upsizer = SimpleResize::new(
width_padded,
height_padded,
blk_x_padded,
blk_y_padded,
data.width,
data.height,
data.pel,
);
let (height_padded_uv, width_uv, height_uv, vector_pitch_uv, upsizer_uv) = if include_chroma {
let x_ratio_uv = NonZeroUsize::from(data.x_ratio_uv);
let y_ratio_uv = NonZeroUsize::from(data.y_ratio_uv);
let width_padded_uv =
NonZeroUsize::new(width_padded.get() / x_ratio_uv.get()).ok_or_else(error)?;
let height_padded_uv =
NonZeroUsize::new(height_padded.get() / y_ratio_uv.get()).ok_or_else(error)?;
let width_uv = NonZeroUsize::new(data.width.get() / x_ratio_uv.get()).ok_or_else(error)?;
let height_uv =
NonZeroUsize::new(data.height.get() / y_ratio_uv.get()).ok_or_else(error)?;
let vector_pitch_uv =
NonZeroUsize::new((width_padded_uv.get() + 15) & !15).ok_or_else(error)?;
let upsizer_uv = SimpleResize::new(
width_padded_uv,
height_padded_uv,
blk_x_padded,
blk_y_padded,
width_uv,
height_uv,
data.pel,
);
(
Some(height_padded_uv),
Some(width_uv),
Some(height_uv),
Some(vector_pitch_uv),
Some(upsizer_uv),
)
} else {
(None, None, None, None, None)
};
Ok(FlowVectorGeometry {
blk_x_padded,
blk_y_padded,
height_padded,
height_padded_uv,
width_uv,
height_uv,
vector_pitch,
vector_pitch_uv,
upsizer,
upsizer_uv,
})
}
fn plane_width(data: &MVAnalysisData, plane: usize) -> usize {
if plane == 0 {
data.width.get()
} else {
data.width.get() / NonZeroUsize::from(data.x_ratio_uv)
}
}
fn plane_height(data: &MVAnalysisData, plane: usize) -> usize {
if plane == 0 {
data.height.get()
} else {
data.height.get() / NonZeroUsize::from(data.y_ratio_uv)
}
}
fn make_vector_small_masks(
fake_gop: &FakeGroupOfPlanes,
blk_x: usize,
blk_y: usize,
blk_x_padded: usize,
blk_y_padded: usize,
) -> (Vec<i16>, Vec<i16>) {
let mut vx = vec![0; blk_x_padded * blk_y_padded];
let mut vy = vec![0; blk_x_padded * blk_y_padded];
for by in 0..blk_y {
for bx in 0..blk_x {
let block = fake_gop.get_block(0, bx + by * blk_x);
*semisafe_get_mut(&mut vx, bx + by * blk_x_padded) = block.vector.x as i16;
*semisafe_get_mut(&mut vy, bx + by * blk_x_padded) = block.vector.y as i16;
}
}
(vx, vy)
}
pub(crate) fn check_and_pad_small_vectors(
vx_small: &mut [i16],
vy_small: &mut [i16],
blk_x_padded: usize,
blk_y_padded: usize,
blk_x: usize,
blk_y: usize,
) {
if blk_x_padded > blk_x {
for row in 0..blk_y {
let vx_right = (*semisafe_get(vx_small, row * blk_x_padded + blk_x - 1)).min(0);
let vy_right = *semisafe_get(vy_small, row * blk_x_padded + blk_x - 1);
for col in blk_x..blk_x_padded {
*semisafe_get_mut(vx_small, row * blk_x_padded + col) = vx_right;
*semisafe_get_mut(vy_small, row * blk_x_padded + col) = vy_right;
}
}
}
if blk_y_padded > blk_y {
for col in 0..blk_x_padded {
let vx_bottom = *semisafe_get(vx_small, blk_x_padded * (blk_y - 1) + col);
let vy_bottom = (*semisafe_get(vy_small, blk_x_padded * (blk_y - 1) + col)).min(0);
for row in blk_y..blk_y_padded {
*semisafe_get_mut(vx_small, blk_x_padded * row + col) = vx_bottom;
*semisafe_get_mut(vy_small, blk_x_padded * row + col) = vy_bottom;
}
}
}
}
pub(crate) fn vector_small_mask_y_to_uv(v_small_y: &[i16], ratio_uv: usize) -> Vec<i16> {
v_small_y
.iter()
.map(|value| if ratio_uv == 2 { *value >> 1 } else { *value })
.collect()
}
pub(crate) fn resize_vector_pair(
upsizer: &SimpleResize,
blk_stride: NonZeroUsize,
vec_pitch: NonZeroUsize,
vx_small: &[i16],
vy_small: &[i16],
) -> ResizedVectorPair {
let vx_full = upsizer.resize_i16_to_vec(vec_pitch, vx_small, blk_stride, true);
let vy_full = upsizer.resize_i16_to_vec(vec_pitch, vy_small, blk_stride, false);
ResizedVectorPair { vx_full, vy_full }
}
pub(crate) fn resize_interpolation_field(
upsizer: &SimpleResize,
blk_stride: NonZeroUsize,
vec_pitch: NonZeroUsize,
_height: NonZeroUsize,
vx_small: &[i16],
vy_small: &[i16],
mask_small: &[u8],
) -> InterpolationField {
let vectors = resize_vector_pair(upsizer, blk_stride, vec_pitch, vx_small, vy_small);
let mask_full = upsizer.resize_u8_to_vec(vec_pitch, mask_small, blk_stride, false);
InterpolationField { vectors, mask_full }
}
#[inline]
fn render_interpolation_plane<T: Pixel>(
mode: FlowInterpolationMode,
extra_vectors: Option<&(ResizedVectorPair, ResizedVectorPair)>,
dst: &mut [T],
dst_stride: usize,
backward: &[T],
backward_stride: usize,
backward_origin_offset: isize,
forward: &[T],
forward_stride: usize,
forward_origin_offset: isize,
field_b: &InterpolationField,
field_f: &InterpolationField,
vector_pitch: usize,
width: usize,
height: usize,
time256: i32,
pel: Subpel,
) {
match mode {
FlowInterpolationMode::Simple => flow_inter_simple(
dst,
dst_stride,
backward,
backward_stride,
backward_origin_offset,
forward,
forward_stride,
forward_origin_offset,
&field_b.vectors.vx_full,
&field_f.vectors.vx_full,
&field_b.vectors.vy_full,
&field_f.vectors.vy_full,
&field_b.mask_full,
&field_f.mask_full,
vector_pitch,
width,
height,
time256,
pel,
),
FlowInterpolationMode::Regular if extra_vectors.is_none() => flow_inter(
dst,
dst_stride,
backward,
backward_stride,
backward_origin_offset,
forward,
forward_stride,
forward_origin_offset,
&field_b.vectors.vx_full,
&field_f.vectors.vx_full,
&field_b.vectors.vy_full,
&field_f.vectors.vy_full,
&field_b.mask_full,
&field_f.mask_full,
vector_pitch,
width,
height,
time256,
pel,
),
FlowInterpolationMode::Regular | FlowInterpolationMode::Extra => {
if let Some((vectors_bb, vectors_ff)) = extra_vectors {
flow_inter_extra(
dst,
dst_stride,
backward,
backward_stride,
backward_origin_offset,
forward,
forward_stride,
forward_origin_offset,
&field_b.vectors.vx_full,
&field_f.vectors.vx_full,
&field_b.vectors.vy_full,
&field_f.vectors.vy_full,
&field_b.mask_full,
&field_f.mask_full,
vector_pitch,
width,
height,
time256,
pel,
&vectors_bb.vx_full,
&vectors_ff.vx_full,
&vectors_bb.vy_full,
&vectors_ff.vy_full,
);
} else {
flow_inter(
dst,
dst_stride,
backward,
backward_stride,
backward_origin_offset,
forward,
forward_stride,
forward_origin_offset,
&field_b.vectors.vx_full,
&field_f.vectors.vx_full,
&field_b.vectors.vy_full,
&field_f.vectors.vy_full,
&field_b.mask_full,
&field_f.mask_full,
vector_pitch,
width,
height,
time256,
pel,
);
}
}
}
}
#[inline]
fn sample_plane<T: Pixel>(
plane: &[T],
stride: usize,
origin_offset: isize,
x: usize,
y: usize,
vx: i32,
vy: i32,
pel: Subpel,
) -> i32 {
let pel_log = pel.log();
let index = origin_offset
+ (((y * stride) << pel_log) as isize)
+ ((x << pel_log) as isize)
+ vy as isize * stride as isize
+ vx as isize;
let index = usize::try_from(index).expect("flow sample index must stay in padded plane");
<T as num_traits::AsPrimitive<i32>>::as_(*semisafe_get(plane, index))
}
#[expect(
clippy::too_many_arguments,
reason = "kernel signature follows C implementation"
)]
#[doc(hidden)]
#[inline]
pub fn flow_inter_simple<T: Pixel>(
dst: &mut [T],
dst_stride: usize,
backward: &[T],
backward_stride: usize,
backward_origin_offset: isize,
forward: &[T],
forward_stride: usize,
forward_origin_offset: isize,
vx_full_b: &[i16],
vx_full_f: &[i16],
vy_full_b: &[i16],
vy_full_f: &[i16],
mask_b: &[u8],
mask_f: &[u8],
vector_pitch: usize,
width: usize,
height: usize,
time256: i32,
pel: Subpel,
) {
for y in 0..height {
for x in 0..width {
let vector_index = y * vector_pitch + x;
let value = if time256 == 128 {
let dst_f = sample_plane(
forward,
forward_stride,
forward_origin_offset,
x,
y,
(*semisafe_get(vx_full_f, vector_index) as i32) >> 1,
(*semisafe_get(vy_full_f, vector_index) as i32) >> 1,
pel,
);
let dst_b = sample_plane(
backward,
backward_stride,
backward_origin_offset,
x,
y,
(*semisafe_get(vx_full_b, vector_index) as i32) >> 1,
(*semisafe_get(vy_full_b, vector_index) as i32) >> 1,
pel,
);
((((dst_f + dst_b) << 8)
+ (dst_b - dst_f)
* (*semisafe_get(mask_f, vector_index) as i32
- *semisafe_get(mask_b, vector_index) as i32))
>> 9) as u32
} else {
let dst_f = sample_plane(
forward,
forward_stride,
forward_origin_offset,
x,
y,
(*semisafe_get(vx_full_f, vector_index) as i32 * time256) >> 8,
(*semisafe_get(vy_full_f, vector_index) as i32 * time256) >> 8,
pel,
);
let dst_b = sample_plane(
backward,
backward_stride,
backward_origin_offset,
x,
y,
(*semisafe_get(vx_full_b, vector_index) as i32 * (256 - time256)) >> 8,
(*semisafe_get(vy_full_b, vector_index) as i32 * (256 - time256)) >> 8,
pel,
);
((((dst_f * (255 - *semisafe_get(mask_f, vector_index) as i32)
+ dst_b * *semisafe_get(mask_f, vector_index) as i32
+ 255)
>> 8)
* (256 - time256)
+ ((dst_b * (255 - *semisafe_get(mask_b, vector_index) as i32)
+ dst_f * *semisafe_get(mask_b, vector_index) as i32
+ 255)
>> 8)
* time256)
>> 8) as u32
};
*semisafe_get_mut(dst, y * dst_stride + x) = T::from_u32_or_max_value(value);
}
}
}
#[expect(
clippy::too_many_arguments,
reason = "kernel signature follows C implementation"
)]
#[doc(hidden)]
#[inline]
pub fn flow_inter<T: Pixel>(
dst: &mut [T],
dst_stride: usize,
backward: &[T],
backward_stride: usize,
backward_origin_offset: isize,
forward: &[T],
forward_stride: usize,
forward_origin_offset: isize,
vx_full_b: &[i16],
vx_full_f: &[i16],
vy_full_b: &[i16],
vy_full_f: &[i16],
mask_b: &[u8],
mask_f: &[u8],
vector_pitch: usize,
width: usize,
height: usize,
time256: i32,
pel: Subpel,
) {
for y in 0..height {
for x in 0..width {
let vector_index = y * vector_pitch + x;
let vx_f = (*semisafe_get(vx_full_f, vector_index) as i32 * time256) >> 8;
let vy_f = (*semisafe_get(vy_full_f, vector_index) as i32 * time256) >> 8;
let dst_f = sample_plane(
forward,
forward_stride,
forward_origin_offset,
x,
y,
vx_f,
vy_f,
pel,
) as i64;
let dst_f0 = sample_plane(
forward,
forward_stride,
forward_origin_offset,
x,
y,
0,
0,
pel,
) as i64;
let vx_b = (*semisafe_get(vx_full_b, vector_index) as i32 * (256 - time256)) >> 8;
let vy_b = (*semisafe_get(vy_full_b, vector_index) as i32 * (256 - time256)) >> 8;
let dst_b = sample_plane(
backward,
backward_stride,
backward_origin_offset,
x,
y,
vx_b,
vy_b,
pel,
) as i64;
let dst_b0 = sample_plane(
backward,
backward_stride,
backward_origin_offset,
x,
y,
0,
0,
pel,
) as i64;
let mask_f = *semisafe_get(mask_f, vector_index) as i64;
let mask_b = *semisafe_get(mask_b, vector_index) as i64;
let mixed_f = (dst_f * (256 - mask_f)
+ ((mask_f * (dst_b * (256 - mask_b) + mask_b * dst_f0) + 256) >> 8)
+ 256)
>> 8;
let mixed_b = (dst_b * (256 - mask_b)
+ ((mask_b * (dst_f * (256 - mask_f) + mask_f * dst_b0) + 256) >> 8)
+ 256)
>> 8;
let value = (((mixed_f * (256 - i64::from(time256)) + mixed_b * i64::from(time256))
>> 8)
- 1) as u32;
*semisafe_get_mut(dst, y * dst_stride + x) = T::from_u32_or_max_value(value);
}
}
}
#[expect(
clippy::too_many_arguments,
reason = "kernel signature follows C implementation"
)]
#[doc(hidden)]
#[inline]
pub fn flow_inter_extra<T: Pixel>(
dst: &mut [T],
dst_stride: usize,
backward: &[T],
backward_stride: usize,
backward_origin_offset: isize,
forward: &[T],
forward_stride: usize,
forward_origin_offset: isize,
vx_full_b: &[i16],
vx_full_f: &[i16],
vy_full_b: &[i16],
vy_full_f: &[i16],
mask_b: &[u8],
mask_f: &[u8],
vector_pitch: usize,
width: usize,
height: usize,
time256: i32,
pel: Subpel,
vx_full_bb: &[i16],
vx_full_ff: &[i16],
vy_full_bb: &[i16],
vy_full_ff: &[i16],
) {
for y in 0..height {
for x in 0..width {
let vector_index = y * vector_pitch + x;
let dst_f = sample_plane(
forward,
forward_stride,
forward_origin_offset,
x,
y,
(*semisafe_get(vx_full_f, vector_index) as i32 * time256) >> 8,
(*semisafe_get(vy_full_f, vector_index) as i32 * time256) >> 8,
pel,
);
let dst_ff = sample_plane(
forward,
forward_stride,
forward_origin_offset,
x,
y,
(*semisafe_get(vx_full_ff, vector_index) as i32 * time256) >> 8,
(*semisafe_get(vy_full_ff, vector_index) as i32 * time256) >> 8,
pel,
);
let dst_b = sample_plane(
backward,
backward_stride,
backward_origin_offset,
x,
y,
(*semisafe_get(vx_full_b, vector_index) as i32 * (256 - time256)) >> 8,
(*semisafe_get(vy_full_b, vector_index) as i32 * (256 - time256)) >> 8,
pel,
);
let dst_bb = sample_plane(
backward,
backward_stride,
backward_origin_offset,
x,
y,
(*semisafe_get(vx_full_bb, vector_index) as i32 * (256 - time256)) >> 8,
(*semisafe_get(vy_full_bb, vector_index) as i32 * (256 - time256)) >> 8,
pel,
);
let min_fb = min(dst_b, dst_f);
let max_fb = max(dst_b, dst_f);
let median_bb = max(min_fb, min(dst_bb, max_fb));
let median_ff = max(min_fb, min(dst_ff, max_fb));
let mask_f = *semisafe_get(mask_f, vector_index) as i32;
let mask_b = *semisafe_get(mask_b, vector_index) as i32;
let value = ((((median_bb * mask_f + dst_f * (256 - mask_f) + 256) >> 8)
* (256 - time256)
+ ((median_ff * mask_b + dst_b * (256 - mask_b) + 256) >> 8) * time256)
>> 8)
- 1;
*semisafe_get_mut(dst, y * dst_stride + x) = T::from_u32_or_max_value(value as u32);
}
}
}
#[cfg(feature = "bench")]
pub use flow_inter as bench_flow_inter;
#[cfg(feature = "bench")]
pub use flow_inter_extra as bench_flow_inter_extra;
#[cfg(feature = "bench")]
pub use flow_inter_simple as bench_flow_inter_simple;
fn pitch_for_plane(pitch: PlaneSizeTuple, plane: usize) -> Result<NonZeroUsize> {
match plane {
0 => Ok(pitch.0),
1 => pitch
.1
.ok_or_else(|| anyhow!("requested plane 1 is not available")),
2 => pitch
.2
.ok_or_else(|| anyhow!("requested plane 2 is not available")),
_ => bail!("requested plane {plane} is not available"),
}
}
#[inline]
const fn reference_origin_offset(
stride: NonZeroUsize,
h_padding: usize,
v_padding: usize,
pel: Subpel,
) -> isize {
(stride.get() * v_padding * pel as usize + h_padding * pel as usize) as isize
}