#[cfg(test)]
mod tests;
use std::{
cmp::{max, min},
num::NonZeroUsize,
};
use anyhow::{Result, anyhow, bail, ensure};
use safefma::Fma;
use semisafe::slice::get as semisafe_get;
use semisafe::slice::get_mut as semisafe_get_mut;
use semisafe::slice::split_at_mut as semisafe_split_at_mut;
use crate::{
analysis::MVAnalysisData,
fake::group_of_planes::FakeGroupOfPlanes,
frame::{FramePlanesMut, FrameView, PlaneSizeTuple},
mv::MotionVector,
params::{MaskKind, Subpel},
resize::SimpleResize,
util::vs_bitblt,
video::{ColorFamily, Resolution, SampleType, VideoInfo},
};
#[derive(Debug, Clone, Copy)]
pub struct MaskOptions {
pub ml: f32,
pub gamma: f32,
pub kind: MaskKind,
pub time: f64,
pub scene_change_value: i32,
}
impl Default for MaskOptions {
#[inline]
fn default() -> Self {
Self {
ml: 100.0,
gamma: 1.0,
kind: MaskKind::default(),
time: 100.0,
scene_change_value: 0,
}
}
}
pub struct Mask {
info: VideoInfo,
vectors_data: MVAnalysisData,
thscd1: u64,
thscd2: u64,
gamma: f32,
kind: MaskKind,
time256: i32,
scene_change_value: i32,
mask_norm_factor: f32,
mask_norm_factor2: f32,
half_gamma: f32,
width_b: NonZeroUsize,
width_b_uv: NonZeroUsize,
height_b: NonZeroUsize,
height_b_uv: NonZeroUsize,
upsizer: SimpleResize,
upsizer_uv: SimpleResize,
}
impl Mask {
#[inline]
pub fn new(
info: VideoInfo,
vectors_data: MVAnalysisData,
options: MaskOptions,
thscd1: Option<u64>,
thscd2: Option<u64>,
) -> Result<Self> {
ensure!(options.ml > 0.0, "Mask: ml must be greater than 0.");
ensure!(options.gamma >= 0.0, "Mask: gamma must not be negative.");
ensure!(
(0.0..=100.0).contains(&options.time),
"Mask: time must be between 0.0 and 100.0 (inclusive)."
);
ensure!(
(0..=255).contains(&options.scene_change_value),
"Mask: ysc must be between 0 and 255 (inclusive)."
);
if info.format.bits_per_sample.get() > 8
|| info.format.sample_type != SampleType::Integer
|| ![ColorFamily::Yuv, ColorFamily::Gray].contains(&info.format.color_family)
|| info.format.sub_sampling_w > 1
|| info.format.sub_sampling_h > 1
{
bail!(
"Mask: input clip must be GRAY8, YUV420P8, YUV422P8, YUV440P8, or YUV444P8, with constant dimensions."
);
}
ensure!(
info.resolution.width == vectors_data.width.get()
&& info.resolution.height == vectors_data.height.get(),
"Mask: wrong source or super clip frame size."
);
let mask_norm_factor = 1.0 / options.ml;
let mask_norm_factor2 = mask_norm_factor.powi(2);
let half_gamma = options.gamma * 0.5;
let mut thscd1 = thscd1.unwrap_or(crate::params::MV_DEFAULT_SCD1);
let mut thscd2 = thscd2.unwrap_or(crate::params::MV_DEFAULT_SCD2);
vectors_data.scale_thscd(&mut thscd1, &mut thscd2, "Mask")?;
let x_ratio_uv = vectors_data.x_ratio_uv.get() as usize;
let y_ratio_uv = vectors_data.y_ratio_uv.get() as usize;
let width_uv = NonZeroUsize::new(vectors_data.width.get() / x_ratio_uv)
.ok_or_else(|| anyhow!("Mask: wrong source or super clip frame size."))?;
let height_uv = NonZeroUsize::new(vectors_data.height.get() / y_ratio_uv)
.ok_or_else(|| anyhow!("Mask: wrong source or super clip frame size."))?;
let overlap_x = vectors_data.overlap_x;
let overlap_y = vectors_data.overlap_y;
let blk_size_x = vectors_data.blk_size_x;
let blk_size_y = vectors_data.blk_size_y;
let width_b = NonZeroUsize::new(
vectors_data.blk_x.get() * (blk_size_x.get() - overlap_x) + overlap_x,
)
.ok_or_else(|| anyhow!("Mask: wrong source or super clip frame size."))?;
let width_b_uv = NonZeroUsize::new(width_b.get() / x_ratio_uv)
.ok_or_else(|| anyhow!("Mask: wrong source or super clip frame size."))?;
let height_b = NonZeroUsize::new(
vectors_data.blk_y.get() * (blk_size_y.get() - overlap_y) + overlap_y,
)
.ok_or_else(|| anyhow!("Mask: wrong source or super clip frame size."))?;
let height_b_uv = NonZeroUsize::new(height_b.get() / y_ratio_uv)
.ok_or_else(|| anyhow!("Mask: wrong source or super clip frame size."))?;
Ok(Self {
info,
vectors_data,
thscd1,
thscd2,
gamma: options.gamma,
kind: options.kind,
time256: (options.time * 256.0 / 100.0) as i32,
scene_change_value: options.scene_change_value,
mask_norm_factor,
mask_norm_factor2,
half_gamma,
width_b,
width_b_uv,
height_b,
height_b_uv,
upsizer: SimpleResize::new(
width_b,
height_b,
vectors_data.blk_x,
vectors_data.blk_y,
vectors_data.width,
vectors_data.height,
vectors_data.pel,
),
upsizer_uv: SimpleResize::new(
width_b_uv,
height_b_uv,
vectors_data.blk_x,
vectors_data.blk_y,
width_uv,
height_uv,
vectors_data.pel,
),
})
}
#[must_use]
#[inline]
pub const fn output_resolution(&self) -> Resolution {
Resolution {
width: self.vectors_data.width.get(),
height: self.vectors_data.height.get(),
}
}
#[inline]
pub fn render_frame(
&self,
src: &FrameView<'_, u8>,
output: &mut FramePlanesMut<'_, u8>,
output_pitch: PlaneSizeTuple,
vectors: &FakeGroupOfPlanes,
) -> Result<()> {
let plane_count = self.info.format.plane_count();
let luma_stride = pitch_for_plane(output_pitch, 0)?;
let width = self.vectors_data.width;
let height = self.vectors_data.height;
let width_uv = NonZeroUsize::new(
self.vectors_data.width.get() / self.vectors_data.x_ratio_uv.get() as usize,
)
.expect("validated during construction");
let height_uv = NonZeroUsize::new(
self.vectors_data.height.get() / self.vectors_data.y_ratio_uv.get() as usize,
)
.expect("validated during construction");
if vectors.is_usable(self.thscd1, self.thscd2) {
let blk_x = self.vectors_data.blk_x;
let blk_y = self.vectors_data.blk_y;
let blk_count = blk_x.get() * blk_y.get();
let pel = self.vectors_data.pel;
let blk_step_x = self.vectors_data.blk_size_x.get() - self.vectors_data.overlap_x;
let blk_step_y = self.vectors_data.blk_size_y.get() - self.vectors_data.overlap_y;
let (small_mask, small_mask_v) = match self.kind {
MaskKind::VectorLength => {
let mut small_mask = Vec::with_capacity(blk_count);
for block_index in 0..blk_count {
let vector = vectors.get_block(0, block_index).vector;
small_mask.push(self.vector_length(vector, pel));
}
(small_mask, None)
}
MaskKind::SadMask => (
self.make_sad_mask(
vectors,
blk_x,
blk_y,
4.0 * self.mask_norm_factor as f64
/ (self.vectors_data.blk_size_x.get()
* self.vectors_data.blk_size_y.get())
as f64,
self.gamma as f64,
pel,
blk_x,
self.time256,
blk_step_x,
blk_step_y,
8,
),
None,
),
MaskKind::OcclusionMask => (
self.make_vector_occlusion_mask(
vectors,
self.vectors_data.is_backward,
blk_x,
blk_y,
1.0 / self.mask_norm_factor as f64,
self.gamma as f64,
pel,
blk_x,
self.time256,
blk_step_x,
blk_step_y,
),
None,
),
MaskKind::HorizontalMotion => {
let mut small_mask = Vec::with_capacity(blk_count);
for block_index in 0..blk_count {
let vector = vectors.get_block(0, block_index).vector;
small_mask.push(horizontal_component(vector, self.mask_norm_factor));
}
(small_mask, None)
}
MaskKind::VerticalMotion => {
let mut small_mask = Vec::with_capacity(blk_count);
for block_index in 0..blk_count {
let vector = vectors.get_block(0, block_index).vector;
small_mask.push(vertical_component(vector, self.mask_norm_factor));
}
(small_mask, None)
}
MaskKind::MotionColormap => {
let mut small_mask = Vec::with_capacity(blk_count);
let mut small_mask_v = Vec::with_capacity(blk_count);
for block_index in 0..blk_count {
let vector = vectors.get_block(0, block_index).vector;
small_mask.push(horizontal_component(vector, self.mask_norm_factor));
small_mask_v.push(vertical_component(vector, self.mask_norm_factor));
}
(small_mask, Some(small_mask_v))
}
};
if self.kind == MaskKind::MotionColormap {
copy_plane_rows(
output.plane_mut(0)?,
luma_stride,
src.plane(0)?,
src.pitch_for_plane(0)?,
width,
height,
);
} else {
let dest = output.plane_mut(0)?;
self.upsizer
.resize_u8(dest, luma_stride, &small_mask, blk_x, false);
extend_resized_plane(
dest,
luma_stride,
width,
self.width_b,
height,
self.height_b,
);
}
if plane_count > 1 {
let chroma_u_stride = pitch_for_plane(output_pitch, 1)?;
let dest1 = output.plane_mut(1)?;
self.upsizer_uv
.resize_u8(dest1, chroma_u_stride, &small_mask, blk_x, false);
extend_resized_plane(
dest1,
chroma_u_stride,
width_uv,
self.width_b_uv,
height_uv,
self.height_b_uv,
);
let chroma_v_stride = pitch_for_plane(output_pitch, 2)?;
let dest2 = output.plane_mut(2)?;
let mask_v = small_mask_v.as_ref().unwrap_or(&small_mask);
self.upsizer_uv
.resize_u8(dest2, chroma_v_stride, mask_v, blk_x, false);
extend_resized_plane(
dest2,
chroma_v_stride,
width_uv,
self.width_b_uv,
height_uv,
self.height_b_uv,
);
}
return Ok(());
}
if self.kind == MaskKind::MotionColormap {
copy_plane_rows(
output.plane_mut(0)?,
luma_stride,
src.plane(0)?,
src.pitch_for_plane(0)?,
width,
height,
);
} else {
fill_plane_prefix(
output.plane_mut(0)?,
luma_stride,
height,
self.scene_change_value as u8,
);
}
if plane_count > 1 {
fill_plane_prefix(
output.plane_mut(1)?,
pitch_for_plane(output_pitch, 1)?,
height_uv,
self.scene_change_value as u8,
);
fill_plane_prefix(
output.plane_mut(2)?,
pitch_for_plane(output_pitch, 2)?,
height_uv,
self.scene_change_value as u8,
);
}
Ok(())
}
#[inline]
fn vector_length(&self, vector: MotionVector, pel: Subpel) -> u8 {
let norm_e = (vector.x.pow(2) + vector.y.pow(2)) as f64 / (pel as u32).pow(2) as f64;
let length = 255.0 * (norm_e * self.mask_norm_factor2 as f64).powf(self.half_gamma as f64);
if length > 255.0 { 255 } else { length as u8 }
}
#[inline]
fn make_sad_mask(
&self,
fake_gop: &FakeGroupOfPlanes,
blk_x: NonZeroUsize,
blk_y: NonZeroUsize,
sad_norm_factor: f64,
gamma: f64,
pel: Subpel,
mask_pitch: NonZeroUsize,
time256: i32,
blk_step_x: usize,
blk_step_y: usize,
bits_per_sample: u8,
) -> Vec<u8> {
let mut mask = vec![0; blk_y.get() * mask_pitch.get()];
let pel = pel as i32;
let time4096_x = (256 - time256) * 16 / (blk_step_x as i32 * pel);
let time4096_y = (256 - time256) * 16 / (blk_step_y as i32 * pel);
for by in 0..blk_y.get() {
for bx in 0..blk_x.get() {
let i = bx + by * blk_x.get();
let block = fake_gop.get_block(0, i);
let mut bxi = bx as i32 - block.vector.x * time4096_x / 4096;
let mut byi = by as i32 - block.vector.y * time4096_y / 4096;
if bxi < 0 || bxi >= blk_x.get() as i32 || byi < 0 || byi >= blk_y.get() as i32 {
bxi = bx as i32;
byi = by as i32;
}
let i1 = bxi as usize + byi as usize * blk_x.get();
let sad = fake_gop.get_block(0, i1).vector.sad >> (bits_per_sample - 8);
*semisafe_get_mut(&mut mask, bx + by * mask_pitch.get()) =
byte_norm(sad, sad_norm_factor, gamma);
}
}
mask
}
#[inline]
fn make_vector_occlusion_mask(
&self,
fake_gop: &FakeGroupOfPlanes,
is_backward: bool,
blk_x: NonZeroUsize,
blk_y: NonZeroUsize,
mask_norm_divider: f64,
gamma: f64,
pel: Subpel,
mask_pitch: NonZeroUsize,
time256: i32,
blk_step_x: usize,
blk_step_y: usize,
) -> Vec<u8> {
let mut mask = vec![0; blk_y.get() * mask_pitch.get()];
let pel = pel as i32;
let time4096_x = time256 * 16 / (blk_step_x as i32 * pel);
let time4096_y = time256 * 16 / (blk_step_y as i32 * pel);
let occ_norm_x = 80.0 / (mask_norm_divider * blk_step_x as f64 * pel as f64);
let occ_norm_y = 80.0 / (mask_norm_divider * blk_step_y as f64 * pel as f64);
for by in 0..blk_y.get() {
for bx in 0..blk_x.get() {
let i = bx + by * blk_x.get();
let block = fake_gop.get_block(0, i);
let vx = block.vector.x;
let vy = block.vector.y;
if bx < blk_x.get() - 1 {
let block1 = fake_gop.get_block(0, i + 1);
if block1.vector.x < vx {
let occlusion = vx - block1.vector.x;
let min_b = if is_backward {
max(0, bx as i32 + 1 - occlusion * time4096_x / 4096) as usize
} else {
bx
};
let max_b = if is_backward {
bx + 1
} else {
min(
bx as i32 + 1 - occlusion * time4096_x / 4096,
blk_x.get() as i32 - 1,
) as usize
};
for bxi in min_b..=max_b {
let mask_ptr = semisafe_get_mut(&mut mask, bxi + by * mask_pitch.get());
byte_occ_mask(mask_ptr, occlusion, occ_norm_x, gamma);
}
}
}
if by < blk_y.get() - 1 {
let block1 = fake_gop.get_block(0, i + blk_x.get());
if block1.vector.y < vy {
let occlusion = vy - block1.vector.y;
let min_b = if is_backward {
max(0, by as i32 + 1 - occlusion * time4096_y / 4096) as usize
} else {
by
};
let max_b = if is_backward {
by + 1
} else {
min(
by as i32 + 1 - occlusion * time4096_y / 4096,
blk_y.get() as i32 - 1,
) as usize
};
for byi in min_b..=max_b {
let mask_ptr = semisafe_get_mut(&mut mask, bx + byi * mask_pitch.get());
byte_occ_mask(mask_ptr, occlusion, occ_norm_y, gamma);
}
}
}
}
}
mask
}
}
#[inline]
fn pitch_for_plane(pitch: PlaneSizeTuple, plane: usize) -> Result<NonZeroUsize> {
match plane {
0 => Ok(pitch.0),
1 => pitch
.1
.ok_or_else(|| anyhow!("requested plane {plane} is not available")),
2 => pitch
.2
.ok_or_else(|| anyhow!("requested plane {plane} is not available")),
_ => bail!("requested plane {plane} is not available"),
}
}
#[inline]
fn horizontal_component(vector: MotionVector, mask_norm_factor: f32) -> u8 {
((vector.x as f32 * mask_norm_factor).fma(100.0, 128.0) as i32).clamp(0, 255) as u8
}
#[inline]
fn vertical_component(vector: MotionVector, mask_norm_factor: f32) -> u8 {
((vector.y as f32 * mask_norm_factor).fma(100.0, 128.0) as i32).clamp(0, 255) as u8
}
#[inline]
fn copy_plane_rows(
dest: &mut [u8],
dest_stride: NonZeroUsize,
src: &[u8],
src_stride: NonZeroUsize,
width: NonZeroUsize,
height: NonZeroUsize,
) {
for row in 0..height.get() {
let src_row = row * src_stride.get();
let dst_row = row * dest_stride.get();
let row_width = width.get();
semisafe_get_mut(dest, dst_row..(dst_row + row_width))
.copy_from_slice(semisafe_get(src, src_row..(src_row + row_width)));
semisafe_get_mut(dest, (dst_row + row_width)..(dst_row + dest_stride.get())).fill(0);
}
}
#[inline]
fn fill_plane_prefix(dest: &mut [u8], stride: NonZeroUsize, height: NonZeroUsize, value: u8) {
let len = stride.get() * height.get();
semisafe_get_mut(dest, ..len).fill(value);
}
#[inline]
fn extend_resized_plane(
dest: &mut [u8],
stride: NonZeroUsize,
width: NonZeroUsize,
width_b: NonZeroUsize,
height: NonZeroUsize,
height_b: NonZeroUsize,
) {
if width > width_b {
for h in 0..height.get() {
for w in width_b.get()..width.get() {
let val = *semisafe_get(dest, h * stride.get() + width_b.get() - 1);
*semisafe_get_mut(dest, h * stride.get() + w) = val;
}
}
}
if height > height_b {
let (before, target) = semisafe_split_at_mut(dest, height_b.get() * stride.get());
let src = semisafe_get(before, ((height_b.get() - 1) * stride.get())..);
vs_bitblt(
target,
stride,
src,
stride,
width,
NonZeroUsize::new(height.get() - height_b.get())
.expect("height_b < height by condition"),
);
}
}
#[inline]
fn byte_occ_mask(mask_ptr: &mut u8, occlusion: i32, occ_norm: f64, gamma: f64) {
*mask_ptr = max(
*mask_ptr,
if gamma == 1.0 {
((255.0 * occlusion as f64 * occ_norm) as i32).clamp(0, 255) as u8
} else {
((255.0 * (occlusion as f64 * occ_norm).powf(gamma)) as i32).clamp(0, 255) as u8
},
);
}
#[inline]
fn byte_norm(sad: i64, sad_norm_factor: f64, gamma: f64) -> u8 {
let length = 255.0 * (sad as f64 * sad_norm_factor).powf(gamma);
if length > 255.0 { 255 } else { length as u8 }
}