#[cfg(test)]
mod tests;
use std::{
mem::size_of,
num::{NonZeroU8, NonZeroUsize},
};
use anyhow::{Result, anyhow, bail, ensure};
use semisafe::slice::get as semisafe_get;
use semisafe::slice::get_mut as semisafe_get_mut;
use crate::{
analysis::MVAnalysisData,
fake::group_of_planes::FakeGroupOfPlanes,
filters::analyse::SuperClipInfo,
frame::{FramePlanesMut, FrameView, PlaneSizeTuple},
mv_gof::BorrowedSuperFrame,
mv_plane::MVPlane,
overlaps::{OverlapWindows, OverlapsFn, ToPixelsFn, select_overlaps, select_to_pixels},
params::MVPlaneSet,
resize::SimpleResize,
util::{
Pixel, blend_plane, check_and_pad_mask_small, make_sad_mask_time,
make_vector_occlusion_mask_time, median,
},
video::{ColorFamily, Resolution, SampleType, VideoInfo},
};
#[derive(Debug, Clone, Copy)]
pub struct BlockFPSOptions {
pub mode: u8,
pub ml: f64,
}
struct BlockFPSFunctions {
overlaps: [OverlapsFn; 3],
to_pixels: ToPixelsFn,
}
struct FlowVectorGeometry {
blk_x_padded: NonZeroUsize,
blk_y_padded: NonZeroUsize,
height_padded: NonZeroUsize,
height_padded_uv: NonZeroUsize,
width_uv: NonZeroUsize,
height_uv: NonZeroUsize,
vector_pitch: NonZeroUsize,
vector_pitch_uv: NonZeroUsize,
upsizer: SimpleResize,
upsizer_uv: Option<SimpleResize>,
}
pub struct BlockFPS {
info: VideoInfo,
super_info: SuperClipInfo,
backward_data: MVAnalysisData,
forward_data: MVAnalysisData,
mode: u8,
ml: f64,
blk_x_padded: NonZeroUsize,
blk_y_padded: NonZeroUsize,
height_padded: NonZeroUsize,
height_padded_uv: NonZeroUsize,
width_uv: NonZeroUsize,
height_uv: NonZeroUsize,
vector_pitch_y: NonZeroUsize,
vector_pitch_uv: NonZeroUsize,
upsizer: SimpleResize,
upsizer_uv: Option<SimpleResize>,
over_wins: Option<OverlapWindows>,
over_wins_uv: Option<OverlapWindows>,
dst_temp_pitch_bytes: NonZeroUsize,
dst_temp_pitch_bytes_uv: NonZeroUsize,
blk_pitch_bytes: NonZeroUsize,
functions: BlockFPSFunctions,
}
impl BlockFPS {
#[inline]
pub fn new(
info: VideoInfo,
actual_super_resolution: Resolution,
super_info: SuperClipInfo,
backward_data: MVAnalysisData,
forward_data: MVAnalysisData,
options: BlockFPSOptions,
) -> Result<Self> {
ensure!(
(0..=8).contains(&options.mode),
"BlockFPS: mode must be between 0 and 8 (inclusive)."
);
ensure!(options.ml > 0.0, "BlockFPS: ml must be greater than 0.");
validate_standard_input(info, "BlockFPS")?;
validate_vector_pair(&backward_data, &forward_data, "BlockFPS")?;
validate_geometry(
info,
actual_super_resolution,
super_info,
backward_data,
"BlockFPS",
)?;
let geometry = build_flow_vector_geometry(
&backward_data,
super_info.mode_yuv.contains(MVPlaneSet::UVPLANES),
"BlockFPS",
)?;
let x_ratio_uv = NonZeroUsize::from(backward_data.x_ratio_uv);
let y_ratio_uv = NonZeroUsize::from(backward_data.y_ratio_uv);
let blk_size_uv_x = NonZeroUsize::new(backward_data.blk_size_x.get() / x_ratio_uv.get())
.ok_or_else(|| anyhow!("BlockFPS: wrong source or super clip frame size."))?;
let blk_size_uv_y = NonZeroUsize::new(backward_data.blk_size_y.get() / y_ratio_uv.get())
.ok_or_else(|| anyhow!("BlockFPS: wrong source or super clip frame size."))?;
let (over_wins, over_wins_uv) =
if backward_data.overlap_x > 0 || backward_data.overlap_y > 0 {
let over_wins = Some(OverlapWindows::new(
backward_data.blk_size_x,
backward_data.blk_size_y,
backward_data.overlap_x,
backward_data.overlap_y,
));
let over_wins_uv = super_info.mode_yuv.contains(MVPlaneSet::UVPLANES).then(|| {
OverlapWindows::new(
blk_size_uv_x,
blk_size_uv_y,
backward_data.overlap_x / x_ratio_uv.get(),
backward_data.overlap_y / y_ratio_uv.get(),
)
});
(over_wins, over_wins_uv)
} else {
(None, None)
};
let bytes_per_sample = info.format.bytes_per_sample.get() as usize;
let dst_temp_pitch_bytes =
NonZeroUsize::new(((backward_data.width.get() + 15) / 16) * 16 * bytes_per_sample * 2)
.ok_or_else(|| anyhow!("BlockFPS: wrong source or super clip frame size."))?;
let dst_temp_pitch_bytes_uv = NonZeroUsize::new(
(((backward_data.width.get() / x_ratio_uv.get()) + 15) / 16)
* 16
* bytes_per_sample
* 2,
)
.ok_or_else(|| anyhow!("BlockFPS: wrong source or super clip frame size."))?;
let blk_pitch_bytes =
NonZeroUsize::new(((backward_data.blk_size_x.get() + 15) & !15) * bytes_per_sample)
.ok_or_else(|| anyhow!("BlockFPS: wrong source or super clip frame size."))?;
let functions = select_functions(
backward_data.blk_size_x,
backward_data.blk_size_y,
blk_size_uv_x,
blk_size_uv_y,
backward_data.bits_per_sample,
);
Ok(Self {
info,
super_info,
backward_data,
forward_data,
mode: options.mode,
ml: options.ml,
blk_x_padded: geometry.blk_x_padded,
blk_y_padded: geometry.blk_y_padded,
height_padded: geometry.height_padded,
height_padded_uv: geometry.height_padded_uv,
width_uv: geometry.width_uv,
height_uv: geometry.height_uv,
vector_pitch_y: geometry.vector_pitch,
vector_pitch_uv: geometry.vector_pitch_uv,
upsizer: geometry.upsizer,
upsizer_uv: geometry.upsizer_uv,
over_wins,
over_wins_uv,
dst_temp_pitch_bytes,
dst_temp_pitch_bytes_uv,
blk_pitch_bytes,
functions,
})
}
#[must_use]
#[inline]
pub fn vectors_are_usable(
&self,
backward: &FakeGroupOfPlanes,
forward: &FakeGroupOfPlanes,
thscd1: u64,
thscd2: u64,
) -> bool {
backward.is_usable(thscd1, thscd2) && forward.is_usable(thscd1, thscd2)
}
#[inline]
pub fn render_frame<T: Pixel>(
&self,
current: &FrameView<'_, T>,
reference: &FrameView<'_, T>,
output: &mut FramePlanesMut<'_, T>,
output_pitch: PlaneSizeTuple,
backward_vectors: &FakeGroupOfPlanes,
forward_vectors: &FakeGroupOfPlanes,
time256: i32,
) -> Result<()> {
let current_super = BorrowedSuperFrame::new(
current,
self.super_info.levels,
self.backward_data.width,
self.backward_data.height,
self.super_info.pel,
self.super_info.hpad,
self.super_info.vpad,
self.super_info.mode_yuv,
self.backward_data.x_ratio_uv,
self.backward_data.y_ratio_uv,
self.backward_data.bits_per_sample,
)?;
let reference_super = BorrowedSuperFrame::new(
reference,
self.super_info.levels,
self.backward_data.width,
self.backward_data.height,
self.super_info.pel,
self.super_info.hpad,
self.super_info.vpad,
self.super_info.mode_yuv,
self.backward_data.x_ratio_uv,
self.backward_data.y_ratio_uv,
self.backward_data.bits_per_sample,
)?;
let mut mask_full_y_b = vec![0u8; self.height_padded.get() * self.vector_pitch_y.get()];
let mut mask_full_y_f = vec![0u8; self.height_padded.get() * self.vector_pitch_y.get()];
let mut mask_occ_y = vec![0u8; self.height_padded.get() * self.vector_pitch_y.get()];
let mut mask_full_uv_b = self
.super_info
.mode_yuv
.contains(MVPlaneSet::UVPLANES)
.then(|| vec![0u8; self.height_padded_uv.get() * self.vector_pitch_uv.get()]);
let mut mask_full_uv_f = self
.super_info
.mode_yuv
.contains(MVPlaneSet::UVPLANES)
.then(|| vec![0u8; self.height_padded_uv.get() * self.vector_pitch_uv.get()]);
let mut mask_occ_uv = self
.super_info
.mode_yuv
.contains(MVPlaneSet::UVPLANES)
.then(|| vec![0u8; self.height_padded_uv.get() * self.vector_pitch_uv.get()]);
if (3..=8).contains(&self.mode) {
let sad_norm = 4.0
/ (self.ml
* self.backward_data.blk_size_x.get() as f64
* self.backward_data.blk_size_y.get() as f64);
let mut back = if self.mode <= 5 {
make_vector_occlusion_mask_time(
backward_vectors,
true,
self.backward_data.blk_x.get(),
self.backward_data.blk_y.get(),
self.ml,
1.0,
self.backward_data.pel,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
256 - time256,
self.backward_data.blk_size_x.get() - self.backward_data.overlap_x,
self.backward_data.blk_size_y.get() - self.backward_data.overlap_y,
)
} else {
make_sad_mask_time(
backward_vectors,
self.backward_data.blk_x.get(),
self.backward_data.blk_y.get(),
sad_norm,
1.0,
self.backward_data.pel,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
256 - time256,
self.backward_data.blk_size_x.get() - self.backward_data.overlap_x,
self.backward_data.blk_size_y.get() - self.backward_data.overlap_y,
self.backward_data.bits_per_sample.get(),
)
};
let mut forward = if self.mode <= 5 {
make_vector_occlusion_mask_time(
forward_vectors,
false,
self.forward_data.blk_x.get(),
self.forward_data.blk_y.get(),
self.ml,
1.0,
self.forward_data.pel,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
time256,
self.forward_data.blk_size_x.get() - self.forward_data.overlap_x,
self.forward_data.blk_size_y.get() - self.forward_data.overlap_y,
)
} else {
make_sad_mask_time(
forward_vectors,
self.forward_data.blk_x.get(),
self.forward_data.blk_y.get(),
sad_norm,
1.0,
self.forward_data.pel,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
time256,
self.forward_data.blk_size_x.get() - self.forward_data.overlap_x,
self.forward_data.blk_size_y.get() - self.forward_data.overlap_y,
self.forward_data.bits_per_sample.get(),
)
};
check_and_pad_mask_small(
&mut back,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
self.backward_data.blk_x.get(),
self.backward_data.blk_y.get(),
);
check_and_pad_mask_small(
&mut forward,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
self.forward_data.blk_x.get(),
self.forward_data.blk_y.get(),
);
self.upsizer.resize_u8(
&mut mask_full_y_b,
self.vector_pitch_y,
&back,
self.blk_x_padded,
false,
);
self.upsizer.resize_u8(
&mut mask_full_y_f,
self.vector_pitch_y,
&forward,
self.blk_x_padded,
false,
);
if let (Some(mask_uv_b), Some(mask_uv_f), Some(upsizer_uv)) = (
mask_full_uv_b.as_mut(),
mask_full_uv_f.as_mut(),
self.upsizer_uv.as_ref(),
) {
upsizer_uv.resize_u8(
mask_uv_b,
self.vector_pitch_uv,
&back,
self.blk_x_padded,
false,
);
upsizer_uv.resize_u8(
mask_uv_f,
self.vector_pitch_uv,
&forward,
self.blk_x_padded,
false,
);
}
if matches!(self.mode, 4 | 5 | 7 | 8) {
let mut occ = vec![0u8; self.blk_x_padded.get() * self.blk_y_padded.get()];
mult_masks(
&forward,
&back,
&mut occ,
self.blk_x_padded.get(),
self.blk_y_padded.get(),
);
self.upsizer.resize_u8(
&mut mask_occ_y,
self.vector_pitch_y,
&occ,
self.blk_x_padded,
false,
);
if let (Some(mask_uv), Some(upsizer_uv)) =
(mask_occ_uv.as_mut(), self.upsizer_uv.as_ref())
{
upsizer_uv.resize_u8(
mask_uv,
self.vector_pitch_uv,
&occ,
self.blk_x_padded,
false,
);
}
}
}
let src_plane_y = current_super.plane(0)?;
let ref_plane_y = reference_super.plane(0)?;
let dst_plane_y = output.plane_mut(0)?;
let src_pitch_y = current.pitch_for_plane(0)?.get();
let ref_pitch_y = reference.pitch_for_plane(0)?.get();
let dst_pitch_y = pitch_for_plane(output_pitch, 0)?.get();
let src_origin_y = self.super_info.hpad + self.super_info.vpad * src_pitch_y;
let ref_origin_y = self.super_info.hpad + self.super_info.vpad * ref_pitch_y;
let width_b = NonZeroUsize::new(
self.backward_data.blk_x.get()
* (self.backward_data.blk_size_x.get() - self.backward_data.overlap_x)
+ self.backward_data.overlap_x,
)
.expect("non-zero width_b");
let height_b = NonZeroUsize::new(
self.backward_data.blk_y.get()
* (self.backward_data.blk_size_y.get() - self.backward_data.overlap_y)
+ self.backward_data.overlap_y,
)
.expect("non-zero height_b");
if self.backward_data.overlap_x == 0 && self.backward_data.overlap_y == 0 {
self.plane_loop_no_overlap::<T, 0>(
dst_plane_y,
dst_pitch_y,
src_plane_y,
src_origin_y,
src_pitch_y,
current_super.mv_plane(0),
ref_plane_y,
ref_origin_y,
ref_pitch_y,
reference_super.mv_plane(0),
&mask_full_y_b,
&mask_full_y_f,
&mask_occ_y,
time256,
backward_vectors,
forward_vectors,
);
} else {
blend_plane(
semisafe_get_mut(dst_plane_y, width_b.get()..),
semisafe_get(src_plane_y, src_origin_y + width_b.get()..),
semisafe_get(ref_plane_y, ref_origin_y + width_b.get()..),
dst_pitch_y,
src_pitch_y,
ref_pitch_y,
self.backward_data.width.get() - width_b.get(),
height_b.get(),
time256,
);
blend_plane(
semisafe_get_mut(dst_plane_y, height_b.get() * dst_pitch_y..),
semisafe_get(src_plane_y, src_origin_y + height_b.get() * src_pitch_y..),
semisafe_get(ref_plane_y, ref_origin_y + height_b.get() * ref_pitch_y..),
dst_pitch_y,
src_pitch_y,
ref_pitch_y,
self.backward_data.width.get(),
self.backward_data.height.get() - height_b.get(),
time256,
);
self.plane_loop_overlap::<T, 0>(
dst_plane_y,
pitch_to_bytes::<T>(pitch_for_plane(output_pitch, 0)?),
src_plane_y,
src_origin_y,
current_super.mv_plane(0),
ref_plane_y,
ref_origin_y,
reference_super.mv_plane(0),
&mask_full_y_b,
&mask_full_y_f,
&mask_occ_y,
time256,
backward_vectors,
forward_vectors,
width_b,
height_b,
);
}
if self.info.format.plane_count() > 1 {
let width_b_uv =
NonZeroUsize::new(width_b.get() / self.backward_data.x_ratio_uv.get() as usize)
.expect("non-zero uv width_b");
let height_b_uv =
NonZeroUsize::new(height_b.get() / self.backward_data.y_ratio_uv.get() as usize)
.expect("non-zero uv height_b");
for plane in [1_usize, 2] {
let src_plane_uv = current_super.plane(plane)?;
let ref_plane_uv = reference_super.plane(plane)?;
let dst_plane_uv = output.plane_mut(plane)?;
let src_pitch_uv = current.pitch_for_plane(plane)?.get();
let ref_pitch_uv = reference.pitch_for_plane(plane)?.get();
let dst_pitch_uv = pitch_for_plane(output_pitch, plane)?.get();
let src_origin_uv = self.super_info.hpad
/ self.backward_data.x_ratio_uv.get() as usize
+ (self.super_info.vpad / self.backward_data.y_ratio_uv.get() as usize)
* src_pitch_uv;
let ref_origin_uv = self.super_info.hpad
/ self.backward_data.x_ratio_uv.get() as usize
+ (self.super_info.vpad / self.backward_data.y_ratio_uv.get() as usize)
* ref_pitch_uv;
let mask_b_uv = mask_full_uv_b
.as_deref()
.expect("uv mask exists when uv planes are present");
let mask_f_uv = mask_full_uv_f
.as_deref()
.expect("uv mask exists when uv planes are present");
let mask_o_uv = mask_occ_uv
.as_deref()
.expect("uv mask exists when uv planes are present");
if self.backward_data.overlap_x == 0 && self.backward_data.overlap_y == 0 {
self.plane_loop_no_overlap::<T, 1>(
dst_plane_uv,
dst_pitch_uv,
src_plane_uv,
src_origin_uv,
src_pitch_uv,
current_super.mv_plane(plane),
ref_plane_uv,
ref_origin_uv,
ref_pitch_uv,
reference_super.mv_plane(plane),
mask_b_uv,
mask_f_uv,
mask_o_uv,
time256,
backward_vectors,
forward_vectors,
);
} else {
blend_plane(
semisafe_get_mut(dst_plane_uv, width_b_uv.get()..),
semisafe_get(src_plane_uv, src_origin_uv + width_b_uv.get()..),
semisafe_get(ref_plane_uv, ref_origin_uv + width_b_uv.get()..),
dst_pitch_uv,
src_pitch_uv,
ref_pitch_uv,
self.width_uv.get() - width_b_uv.get(),
height_b_uv.get(),
time256,
);
blend_plane(
semisafe_get_mut(dst_plane_uv, height_b_uv.get() * dst_pitch_uv..),
semisafe_get(
src_plane_uv,
src_origin_uv + height_b_uv.get() * src_pitch_uv..,
),
semisafe_get(
ref_plane_uv,
ref_origin_uv + height_b_uv.get() * ref_pitch_uv..,
),
dst_pitch_uv,
src_pitch_uv,
ref_pitch_uv,
self.width_uv.get(),
self.height_uv.get() - height_b_uv.get(),
time256,
);
self.plane_loop_overlap::<T, 1>(
dst_plane_uv,
pitch_to_bytes::<T>(pitch_for_plane(output_pitch, plane)?),
src_plane_uv,
src_origin_uv,
current_super.mv_plane(plane),
ref_plane_uv,
ref_origin_uv,
reference_super.mv_plane(plane),
mask_b_uv,
mask_f_uv,
mask_o_uv,
time256,
backward_vectors,
forward_vectors,
width_b_uv,
height_b_uv,
);
}
}
}
Ok(())
}
#[expect(
clippy::too_many_arguments,
reason = "block loop needs all data slices"
)]
fn plane_loop_no_overlap<T: Pixel, const PLANE: usize>(
&self,
dst_plane: &mut [T],
dst_stride: usize,
src_plane_data: &[T],
src_origin: usize,
src_pitch: usize,
src_mv_plane: &MVPlane,
ref_plane_data: &[T],
ref_origin: usize,
ref_pitch: usize,
ref_mv_plane: &MVPlane,
mask_b: &[u8],
mask_f: &[u8],
mask_occ: &[u8],
time256: i32,
fgop_b: &FakeGroupOfPlanes,
fgop_f: &FakeGroupOfPlanes,
) {
let x_ratio = if PLANE == 0 {
1
} else {
self.backward_data.x_ratio_uv.get() as i32
};
let y_ratio = if PLANE == 0 {
1
} else {
self.backward_data.y_ratio_uv.get() as i32
};
let width = if PLANE == 0 {
self.backward_data.width.get()
} else {
self.width_uv.get()
};
let height = if PLANE == 0 {
self.backward_data.height.get()
} else {
self.height_uv.get()
};
let blk_size_x = if PLANE == 0 {
self.backward_data.blk_size_x.get()
} else {
self.backward_data.blk_size_x.get() / self.backward_data.x_ratio_uv.get() as usize
};
let blk_size_y = if PLANE == 0 {
self.backward_data.blk_size_y.get()
} else {
self.backward_data.blk_size_y.get() / self.backward_data.y_ratio_uv.get() as usize
};
let mask_pitch = if PLANE == 0 {
self.vector_pitch_y.get()
} else {
self.vector_pitch_uv.get()
};
let blocks_x = self.backward_data.blk_x.get();
let blocks_y = self.backward_data.blk_y.get();
for by in 0..blocks_y {
let dst_row_offset = by * blk_size_y * dst_stride;
let src_row_offset = src_origin + by * blk_size_y * src_pitch;
let ref_row_offset = ref_origin + by * blk_size_y * ref_pitch;
let mask_row_offset = by * blk_size_y * mask_pitch;
for bx in 0..blocks_x {
let i = by * blocks_x + bx;
let block_b = fgop_b.get_block(0, i);
let block_f = fgop_f.get_block(0, i);
let dst_offset = dst_row_offset + bx * blk_size_x;
let src_offset = src_row_offset + bx * blk_size_x;
let ref_offset = ref_row_offset + bx * blk_size_x;
let mask_offset = mask_row_offset + bx * blk_size_x;
let mcb_offset = ref_mv_plane.get_pix_offset(
(block_b.x * self.super_info.pel as i32
+ ((block_b.vector.x * (256 - time256)) >> 8))
/ x_ratio,
(block_b.y * self.super_info.pel as i32
+ ((block_b.vector.y * (256 - time256)) >> 8))
/ y_ratio,
);
let mcf_offset = src_mv_plane.get_pix_offset(
(block_f.x * self.super_info.pel as i32 + ((block_f.vector.x * time256) >> 8))
/ x_ratio,
(block_f.y * self.super_info.pel as i32 + ((block_f.vector.y * time256) >> 8))
/ y_ratio,
);
result_block(
semisafe_get_mut(dst_plane, dst_offset..),
dst_stride,
semisafe_get(ref_plane_data, mcb_offset..),
ref_pitch,
semisafe_get(src_plane_data, mcf_offset..),
src_pitch,
semisafe_get(ref_plane_data, ref_offset..),
ref_pitch,
semisafe_get(src_plane_data, src_offset..),
src_pitch,
semisafe_get(mask_b, mask_offset..),
mask_pitch,
semisafe_get(mask_f, mask_offset..),
semisafe_get(mask_occ, mask_offset..),
blk_size_x,
blk_size_y,
time256,
self.mode,
self.backward_data.bits_per_sample.get(),
);
}
}
if width > blocks_x * blk_size_x {
let offset = blocks_x * blk_size_x;
blend_plane(
semisafe_get_mut(dst_plane, offset..),
semisafe_get(src_plane_data, src_origin + offset..),
semisafe_get(ref_plane_data, ref_origin + offset..),
dst_stride,
src_pitch,
ref_pitch,
width - offset,
height,
time256,
);
}
if height > blocks_y * blk_size_y {
let offset = blocks_y * blk_size_y * dst_stride;
let src_tail_offset = src_origin + blocks_y * blk_size_y * src_pitch;
let ref_tail_offset = ref_origin + blocks_y * blk_size_y * ref_pitch;
blend_plane(
semisafe_get_mut(dst_plane, offset..),
semisafe_get(src_plane_data, src_tail_offset..),
semisafe_get(ref_plane_data, ref_tail_offset..),
dst_stride,
src_pitch,
ref_pitch,
width,
height - blocks_y * blk_size_y,
time256,
);
}
}
#[expect(
clippy::too_many_arguments,
reason = "overlap loop needs all data slices"
)]
fn plane_loop_overlap<T: Pixel, const PLANE: usize>(
&self,
dst_plane: &mut [T],
dst_stride_bytes: NonZeroUsize,
src_plane_data: &[T],
src_origin: usize,
src_mv_plane: &MVPlane,
ref_plane_data: &[T],
ref_origin: usize,
ref_mv_plane: &MVPlane,
mask_b: &[u8],
mask_f: &[u8],
mask_occ: &[u8],
time256: i32,
fgop_b: &FakeGroupOfPlanes,
fgop_f: &FakeGroupOfPlanes,
width_b: NonZeroUsize,
height_b: NonZeroUsize,
) {
let x_ratio = if PLANE == 0 {
1
} else {
self.backward_data.x_ratio_uv.get() as i32
};
let y_ratio = if PLANE == 0 {
1
} else {
self.backward_data.y_ratio_uv.get() as i32
};
let overlap_x = if PLANE == 0 {
self.backward_data.overlap_x
} else {
self.backward_data.overlap_x / self.backward_data.x_ratio_uv.get() as usize
};
let overlap_y = if PLANE == 0 {
self.backward_data.overlap_y
} else {
self.backward_data.overlap_y / self.backward_data.y_ratio_uv.get() as usize
};
let blk_size_x = if PLANE == 0 {
self.backward_data.blk_size_x.get()
} else {
self.backward_data.blk_size_x.get() / self.backward_data.x_ratio_uv.get() as usize
};
let blk_size_y = if PLANE == 0 {
self.backward_data.blk_size_y.get()
} else {
self.backward_data.blk_size_y.get() / self.backward_data.y_ratio_uv.get() as usize
};
let src_pitch = src_mv_plane.stride.get();
let ref_pitch = ref_mv_plane.stride.get();
let mask_pitch = if PLANE == 0 {
self.vector_pitch_y.get()
} else {
self.vector_pitch_uv.get()
};
let dst_temp_stride_bytes = if PLANE == 0 {
self.dst_temp_pitch_bytes
} else {
self.dst_temp_pitch_bytes_uv
};
let mut dst_temp = vec![0u8; height_b.get() * dst_temp_stride_bytes.get()];
let mut tmp_block =
vec![T::default(); self.blk_pitch_bytes.get() / size_of::<T>() * blk_size_y];
let over_wins = if PLANE == 0 {
self.over_wins.as_ref()
} else {
self.over_wins_uv.as_ref()
}
.expect("overlap windows available when overlap path active");
let mut dst_temp_offset_bytes = 0;
let mut src_offset = src_origin;
let mut ref_offset = ref_origin;
let mut mask_offset = 0;
for by in 0..self.backward_data.blk_y.get() {
let wby = ((by + self.backward_data.blk_y.get() - 3)
/ (self.backward_data.blk_y.get() - 2))
* 3;
let mut wbx = 0;
let mut xx = 0;
for bx in 0..self.backward_data.blk_x.get() {
wbx = if bx == self.backward_data.blk_x.get() - 1 {
2
} else {
wbx
};
let win_over = over_wins.get_window(wby + wbx);
let i = by * self.backward_data.blk_x.get() + bx;
let block_b = fgop_b.get_block(0, i);
let block_f = fgop_f.get_block(0, i);
let mcb_offset = ref_mv_plane.get_pix_offset(
(block_b.x * self.super_info.pel as i32
+ ((block_b.vector.x * (256 - time256)) >> 8))
/ x_ratio,
(block_b.y * self.super_info.pel as i32
+ ((block_b.vector.y * (256 - time256)) >> 8))
/ y_ratio,
);
let mcf_offset = src_mv_plane.get_pix_offset(
(block_f.x * self.super_info.pel as i32 + ((block_f.vector.x * time256) >> 8))
/ x_ratio,
(block_f.y * self.super_info.pel as i32 + ((block_f.vector.y * time256) >> 8))
/ y_ratio,
);
result_block(
&mut tmp_block,
self.blk_pitch_bytes.get() / size_of::<T>(),
semisafe_get(ref_plane_data, mcb_offset..),
ref_pitch,
semisafe_get(src_plane_data, mcf_offset..),
src_pitch,
semisafe_get(ref_plane_data, ref_offset + xx..),
ref_pitch,
semisafe_get(src_plane_data, src_offset + xx..),
src_pitch,
semisafe_get(mask_b, mask_offset + xx..),
mask_pitch,
semisafe_get(mask_f, mask_offset + xx..),
semisafe_get(mask_occ, mask_offset + xx..),
blk_size_x,
blk_size_y,
time256,
self.mode,
self.backward_data.bits_per_sample.get(),
);
unsafe {
(*semisafe_get(&self.functions.overlaps, PLANE))(
semisafe_get_mut(
&mut dst_temp,
(dst_temp_offset_bytes + xx * size_of::<T>() * 2)..,
)
.as_mut_ptr(),
dst_temp_stride_bytes,
tmp_block.as_ptr().cast(),
self.blk_pitch_bytes,
win_over.as_ptr(),
NonZeroUsize::new(blk_size_x).expect("non-zero block width"),
);
}
xx += blk_size_x - overlap_x;
wbx = 1;
}
dst_temp_offset_bytes += dst_temp_stride_bytes.get() * (blk_size_y - overlap_y);
src_offset += src_pitch * (blk_size_y - overlap_y);
ref_offset += ref_pitch * (blk_size_y - overlap_y);
mask_offset += mask_pitch * (blk_size_y - overlap_y);
}
unsafe {
(self.functions.to_pixels)(
dst_plane.as_mut_ptr().cast(),
dst_stride_bytes,
dst_temp.as_ptr(),
dst_temp_stride_bytes,
width_b,
height_b,
self.backward_data.bits_per_sample,
);
}
}
}
fn validate_standard_input(info: VideoInfo, filter_name: &str) -> Result<()> {
let format = info.format;
if format.bits_per_sample.get() > 16 {
bail!("{filter_name}: input clip must be 8-16 bits");
}
if format.sample_type != SampleType::Integer {
bail!("{filter_name}: input clip must be integer format");
}
if ![ColorFamily::Yuv, ColorFamily::Gray].contains(&format.color_family)
|| format.sub_sampling_w > 1
|| format.sub_sampling_h > 1
{
bail!("{filter_name}: input clip must be GRAY, 420, 422, 440, or 444");
}
Ok(())
}
fn validate_vector_pair(
backward_data: &MVAnalysisData,
forward_data: &MVAnalysisData,
filter_name: &str,
) -> Result<()> {
backward_data.check_similarity(forward_data, filter_name, "mvbw", "mvfw")?;
ensure!(
backward_data.delta_frame > 0 && forward_data.delta_frame > 0,
"{filter_name}: cannot use motion vectors with absolute frame references."
);
ensure!(
backward_data.delta_frame == forward_data.delta_frame,
"{filter_name}: mvbw and mvfw must be generated with the same delta."
);
ensure!(
backward_data.is_backward,
"{filter_name}: mvbw must be generated with isb=True."
);
ensure!(
!forward_data.is_backward,
"{filter_name}: mvfw must be generated with isb=False."
);
Ok(())
}
fn validate_geometry(
info: VideoInfo,
actual_super_resolution: Resolution,
super_info: SuperClipInfo,
vectors_data: MVAnalysisData,
filter_name: &str,
) -> Result<()> {
ensure!(
vectors_data.width.get() == info.resolution.width
&& vectors_data.height.get() == info.resolution.height,
"{filter_name}: wrong source or super clip frame size."
);
ensure!(
vectors_data.width.get() + vectors_data.h_padding * 2 == actual_super_resolution.width
&& vectors_data.height.get() + vectors_data.v_padding * 2
<= actual_super_resolution.height,
"{filter_name}: wrong source or super clip frame size."
);
let super_padding_matches =
(super_info.hpad, super_info.vpad) == (vectors_data.h_padding, vectors_data.v_padding);
ensure!(
super_info.height == vectors_data.height
&& super_padding_matches
&& super_info.pel == vectors_data.pel,
"{filter_name}: wrong source or super clip frame size."
);
let expected_x_ratio = 1usize << usize::from(info.format.sub_sampling_w);
let expected_y_ratio = 1usize << usize::from(info.format.sub_sampling_h);
ensure!(
vectors_data.x_ratio_uv.get() as usize == expected_x_ratio
&& vectors_data.y_ratio_uv.get() as usize == expected_y_ratio,
"{filter_name}: input clip subsampling does not match vector metadata."
);
Ok(())
}
fn build_flow_vector_geometry(
data: &MVAnalysisData,
include_chroma: bool,
filter_name: &str,
) -> Result<FlowVectorGeometry> {
let error = || anyhow!("{filter_name}: wrong source or super clip frame size.");
let mut blk_x_padded = data.blk_x.get();
while blk_x_padded * (data.blk_size_x.get() - data.overlap_x) + data.overlap_x
< data.width.get()
{
blk_x_padded += 1;
}
let mut blk_y_padded = data.blk_y.get();
while blk_y_padded * (data.blk_size_y.get() - data.overlap_y) + data.overlap_y
< data.height.get()
{
blk_y_padded += 1;
}
let blk_x_padded = NonZeroUsize::new(blk_x_padded).ok_or_else(error)?;
let blk_y_padded = NonZeroUsize::new(blk_y_padded).ok_or_else(error)?;
let width_padded = NonZeroUsize::new(
blk_x_padded.get() * (data.blk_size_x.get() - data.overlap_x) + data.overlap_x,
)
.ok_or_else(error)?;
let height_padded = NonZeroUsize::new(
blk_y_padded.get() * (data.blk_size_y.get() - data.overlap_y) + data.overlap_y,
)
.ok_or_else(error)?;
let x_ratio_uv = NonZeroUsize::from(data.x_ratio_uv);
let y_ratio_uv = NonZeroUsize::from(data.y_ratio_uv);
let width_padded_uv =
NonZeroUsize::new(width_padded.get() / x_ratio_uv.get()).ok_or_else(error)?;
let height_padded_uv =
NonZeroUsize::new(height_padded.get() / y_ratio_uv.get()).ok_or_else(error)?;
let width_uv = NonZeroUsize::new(data.width.get() / x_ratio_uv.get()).ok_or_else(error)?;
let height_uv = NonZeroUsize::new(data.height.get() / y_ratio_uv.get()).ok_or_else(error)?;
let vector_pitch = NonZeroUsize::new((width_padded.get() + 15) & !15).ok_or_else(error)?;
let vector_pitch_uv =
NonZeroUsize::new((width_padded_uv.get() + 15) & !15).ok_or_else(error)?;
let upsizer = SimpleResize::new(
width_padded,
height_padded,
blk_x_padded,
blk_y_padded,
data.width,
data.height,
data.pel,
);
let upsizer_uv = include_chroma.then(|| {
SimpleResize::new(
width_padded_uv,
height_padded_uv,
blk_x_padded,
blk_y_padded,
width_uv,
height_uv,
data.pel,
)
});
Ok(FlowVectorGeometry {
blk_x_padded,
blk_y_padded,
height_padded,
height_padded_uv,
width_uv,
height_uv,
vector_pitch,
vector_pitch_uv,
upsizer,
upsizer_uv,
})
}
fn select_functions(
blk_size_x: NonZeroUsize,
blk_size_y: NonZeroUsize,
blk_size_uv_x: NonZeroUsize,
blk_size_uv_y: NonZeroUsize,
bits_per_sample: NonZeroU8,
) -> BlockFPSFunctions {
if bits_per_sample.get() == 8 {
BlockFPSFunctions {
overlaps: [
select_overlaps::<u8>(blk_size_x, blk_size_y),
select_overlaps::<u8>(blk_size_uv_x, blk_size_uv_y),
select_overlaps::<u8>(blk_size_uv_x, blk_size_uv_y),
],
to_pixels: select_to_pixels::<u8>(),
}
} else {
BlockFPSFunctions {
overlaps: [
select_overlaps::<u16>(blk_size_x, blk_size_y),
select_overlaps::<u16>(blk_size_uv_x, blk_size_uv_y),
select_overlaps::<u16>(blk_size_uv_x, blk_size_uv_y),
],
to_pixels: select_to_pixels::<u16>(),
}
}
}
fn mult_masks(
small_mask_f: &[u8],
small_mask_b: &[u8],
small_mask_o: &mut [u8],
blk_x: usize,
blk_y: usize,
) {
for y in 0..blk_y {
for x in 0..blk_x {
let idx = y * blk_x + x;
*semisafe_get_mut(small_mask_o, idx) = ((*semisafe_get(small_mask_f, idx) as u16
* *semisafe_get(small_mask_b, idx) as u16)
/ 255) as u8;
}
}
}
#[expect(clippy::too_many_arguments, reason = "port C ResultBlock signature")]
fn result_block<T: Pixel>(
dst: &mut [T],
dst_stride: usize,
mcb: &[T],
mcb_stride: usize,
mcf: &[T],
mcf_stride: usize,
ref_plane: &[T],
ref_stride: usize,
src_plane: &[T],
src_stride: usize,
mask_b: &[u8],
mask_stride: usize,
mask_f: &[u8],
mask_occ: &[u8],
blk_size_x: usize,
blk_size_y: usize,
time256: i32,
mode: u8,
bits_per_sample: u8,
) {
for y in 0..blk_size_y {
for x in 0..blk_size_x {
let dst_idx = y * dst_stride + x;
let mcb_idx = y * mcb_stride + x;
let mcf_idx = y * mcf_stride + x;
let ref_idx = y * ref_stride + x;
let src_idx = y * src_stride + x;
let mask_idx = y * mask_stride + x;
let mcb_val = <T as num_traits::AsPrimitive<u32>>::as_(*semisafe_get(mcb, mcb_idx));
let mcf_val = <T as num_traits::AsPrimitive<u32>>::as_(*semisafe_get(mcf, mcf_idx));
let value = match mode {
0 => (mcb_val * time256 as u32 + mcf_val * (256 - time256) as u32) >> 8,
1 => {
let mca = (mcb_val * time256 as u32 + mcf_val * (256 - time256) as u32) >> 8;
let ref_val =
<T as num_traits::AsPrimitive<u32>>::as_(*semisafe_get(ref_plane, ref_idx));
let src_val =
<T as num_traits::AsPrimitive<u32>>::as_(*semisafe_get(src_plane, src_idx));
median(ref_val, src_val, mca)
}
2 => {
let avg = (<T as num_traits::AsPrimitive<u32>>::as_(*semisafe_get(
ref_plane, ref_idx,
)) * time256 as u32
+ <T as num_traits::AsPrimitive<u32>>::as_(*semisafe_get(
src_plane, src_idx,
)) * (256 - time256) as u32)
>> 8;
median(avg, mcb_val, mcf_val)
}
3 | 6 => {
let mb = *semisafe_get(mask_b, mask_idx) as u32;
let mf = *semisafe_get(mask_f, mask_idx) as u32;
((((mb * mcf_val + (255 - mb) * mcb_val + 255) >> 8) * time256 as u32)
+ (((mf * mcb_val + (255 - mf) * mcf_val + 255) >> 8)
* (256 - time256) as u32))
>> 8
}
4 | 7 => {
let mb = *semisafe_get(mask_b, mask_idx) as u32;
let mf = *semisafe_get(mask_f, mask_idx) as u32;
let occ = *semisafe_get(mask_occ, mask_idx) as u32;
let f = (mf * mcb_val + (255 - mf) * mcf_val + 255) >> 8;
let b = (mb * mcf_val + (255 - mb) * mcb_val + 255) >> 8;
let avg = (<T as num_traits::AsPrimitive<u32>>::as_(*semisafe_get(
ref_plane, ref_idx,
)) * time256 as u32
+ <T as num_traits::AsPrimitive<u32>>::as_(*semisafe_get(
src_plane, src_idx,
)) * (256 - time256) as u32
+ 255)
>> 8;
let m = (b * time256 as u32 + f * (256 - time256) as u32) >> 8;
(avg * occ + m * (255 - occ) + 255) >> 8
}
5 | 8 => (*semisafe_get(mask_occ, mask_idx) as u32) << (bits_per_sample - 8),
_ => unreachable!("mode validated in constructor"),
};
*semisafe_get_mut(dst, dst_idx) = T::from_u32_or_max_value(value);
}
}
}
fn pitch_for_plane(pitch: PlaneSizeTuple, plane: usize) -> Result<NonZeroUsize> {
match plane {
0 => Ok(pitch.0),
1 => pitch
.1
.ok_or_else(|| anyhow!("requested plane {plane} is not available")),
2 => pitch
.2
.ok_or_else(|| anyhow!("requested plane {plane} is not available")),
_ => bail!("requested plane {plane} is not available"),
}
}
const fn pitch_to_bytes<T: Pixel>(pitch_pixels: NonZeroUsize) -> NonZeroUsize {
unsafe { NonZeroUsize::new_unchecked(pitch_pixels.get() * size_of::<T>()) }
}