zoomvtools 1.1.1

Video motion vector analysis utilities in pure Rust
Documentation
#[cfg(all(target_arch = "x86_64", feature = "avx2"))]
mod avx2;
mod rust;

#[cfg(test)]
mod tests;

use std::{
    cmp::{max, min},
    mem::MaybeUninit,
    num::NonZeroUsize,
};

use crate::{
    params::Subpel,
    util::{assume_init_vec, uninit_vec},
};
use semisafe::slice::get_mut as semisafe_get_mut;

pub(crate) const SIMPLE_RESIZE_WEIGHT_SHIFT: i32 = 14;
pub(crate) const SIMPLE_RESIZE_WEIGHT_MAX: i32 = 1 << SIMPLE_RESIZE_WEIGHT_SHIFT;
pub(crate) const SIMPLE_RESIZE_WEIGHT_HALF: i32 = SIMPLE_RESIZE_WEIGHT_MAX / 2;

pub struct SimpleResize {
    dest_width: NonZeroUsize,
    dest_height: NonZeroUsize,
    src_width: NonZeroUsize,
    // Used only to limit the vectors in the 16 bit resizer.
    // dst_width and dst_height are usually the padded dimensions.
    // The two below are the unpadded dimensions, i.e. the actual frame size.
    limit_width: NonZeroUsize,
    limit_height: NonZeroUsize,
    pel: Subpel,

    vertical_offsets: Box<[usize]>,
    vertical_weights: Box<[i32]>,
    horizontal_offsets: Box<[usize]>,
    #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
    horizontal_offsets_avx2: Box<[i32]>,
    horizontal_weights: Box<[i32]>,
    #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
    horizontal_weights_avx2: Box<[i32]>,

    resize_u8_fn: SimpleResizeFn,
    resize_i16_fn: SimpleResizeFn,
}

impl SimpleResize {
    #[must_use]
    #[inline]
    pub fn new(
        dest_width: NonZeroUsize,
        dest_height: NonZeroUsize,
        src_width: NonZeroUsize,
        src_height: NonZeroUsize,
        limit_width: NonZeroUsize,
        limit_height: NonZeroUsize,
        pel: Subpel,
    ) -> Self {
        let (vertical_offsets, vertical_weights) = Self::init_tables(dest_height, src_height);
        #[allow(unused_mut, reason = "not mutated without avx2")]
        let (horizontal_offsets, mut horizontal_weights) = Self::init_tables(dest_width, src_width);

        #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
        let horizontal_offsets_avx2 = horizontal_offsets
            .iter()
            .map(|&offset| i32::try_from(offset).expect("resize offset fits in i32"))
            .collect();

        #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
        let horizontal_weights_avx2 = horizontal_weights
            .iter()
            .copied()
            .map(|weight| (weight << 16) | (SIMPLE_RESIZE_WEIGHT_MAX - weight))
            .collect();

        #[allow(unused_mut, reason = "not mutated without avx2")]
        let mut resize_fn = rust::simple_resize::<u8> as SimpleResizeFn;
        #[allow(unused_mut, reason = "not mutated without avx2")]
        let mut resize_i16_fn = rust::simple_resize::<i16> as SimpleResizeFn;

        #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
        if crate::util::has_avx2() {
            // PERF: 5-15% faster than scalar on test machine
            resize_fn = avx2::simple_resize_u8;
        }
        #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
        if crate::util::has_avx2() {
            // PERF: 40-90% faster than scalar on test machine
            resize_i16_fn = avx2::simple_resize_i16;
        }

        Self {
            dest_width,
            dest_height,
            src_width,
            limit_width,
            limit_height,
            pel,
            vertical_offsets,
            vertical_weights,
            horizontal_offsets,
            #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
            horizontal_offsets_avx2,
            horizontal_weights,
            #[cfg(all(target_arch = "x86_64", feature = "avx2"))]
            horizontal_weights_avx2,
            resize_u8_fn: resize_fn,
            resize_i16_fn,
        }
    }

    fn init_tables(out_size: NonZeroUsize, in_size: NonZeroUsize) -> (Box<[usize]>, Box<[i32]>) {
        let out_size = out_size.get();
        let in_size = in_size.get();

        let left_most = 0.5f32;
        let right_most = in_size as f32 - 0.5;

        let leftmost_idx = max(left_most as usize, 0);
        let rightmost_idx = min(right_most as usize, in_size - 1);

        let mut offsets = Vec::with_capacity(out_size);
        let mut weights = Vec::with_capacity(out_size);
        for i in 0..out_size {
            let position = (i as f32 + 0.5) * in_size as f32 / out_size as f32;
            let (offset, weight) = if position <= left_most {
                (leftmost_idx, 0.0f32)
            } else if position >= right_most {
                (rightmost_idx - 1, 1.0f32)
            } else {
                let offset = (position - left_most) as usize;
                (offset, position - left_most - offset as f32)
            };
            offsets.push(offset);
            weights.push((weight * SIMPLE_RESIZE_WEIGHT_MAX as f32) as i32);
        }

        (offsets.into_boxed_slice(), weights.into_boxed_slice())
    }

    fn init_stride_padding<T: Copy>(
        &self,
        dest: &mut [MaybeUninit<T>],
        dest_stride_pixels: NonZeroUsize,
        value: T,
    ) {
        let stride = dest_stride_pixels.get();
        let width = self.dest_width.get();
        let height = self.dest_height.get();

        debug_assert!(
            stride >= width,
            "destination stride must be at least resize width"
        );
        debug_assert!(
            dest.len() >= stride * height,
            "destination buffer must cover full resize area"
        );

        for y in 0..height {
            let row_offset = y * stride;
            for x in width..stride {
                semisafe_get_mut(dest, row_offset + x).write(value);
            }
        }
    }

    #[cfg_attr(
        feature = "tracing",
        tracing::instrument(skip_all, name = "resize::resize_u8", fields(horizontal_vectors))
    )]
    #[inline]
    pub fn resize_u8(
        &self,
        dest: &mut [u8],
        dest_stride_pixels: NonZeroUsize,
        src: &[u8],
        src_stride_pixels: NonZeroUsize,
        horizontal_vectors: bool,
    ) {
        let dest_stride_bytes = dest_stride_pixels;
        let src_stride_bytes = src_stride_pixels;

        // SAFETY: SIMD variant is selected at initialization
        unsafe {
            (self.resize_u8_fn)(
                self,
                dest.as_mut_ptr().cast(),
                dest_stride_bytes,
                src.as_ptr().cast(),
                src_stride_bytes,
                horizontal_vectors,
            );
        }
    }

    #[must_use]
    #[cfg_attr(
        feature = "tracing",
        tracing::instrument(
            skip_all,
            name = "resize::resize_u8_to_vec",
            fields(horizontal_vectors)
        )
    )]
    #[inline]
    pub fn resize_u8_to_vec(
        &self,
        dest_stride_pixels: NonZeroUsize,
        src: &[u8],
        src_stride_pixels: NonZeroUsize,
        horizontal_vectors: bool,
    ) -> Vec<u8> {
        let mut dest = uninit_vec(dest_stride_pixels.get() * self.dest_height.get());
        self.init_stride_padding(&mut dest, dest_stride_pixels, 0u8);

        self.resize_u8(
            // SAFETY: this is where we initialize the `dest` values
            unsafe { dest.assume_init_mut() },
            dest_stride_pixels,
            src,
            src_stride_pixels,
            horizontal_vectors,
        );
        // SAFETY: selected backend writes every active pixel; row padding was initialized above.
        unsafe { assume_init_vec(dest) }
    }

    #[allow(
        dead_code,
        reason = "used by tests and retained for existing direct-buffer callers"
    )]
    #[cfg_attr(
        feature = "tracing",
        tracing::instrument(skip_all, name = "resize::resize_i16", fields(horizontal_vectors))
    )]
    #[inline]
    pub fn resize_i16(
        &self,
        dest: &mut [i16],
        dest_stride_pixels: NonZeroUsize,
        src: &[i16],
        src_stride_pixels: NonZeroUsize,
        horizontal_vectors: bool,
    ) {
        // SAFETY: cannot be non-zero
        let dest_stride_bytes =
            unsafe { NonZeroUsize::new_unchecked(dest_stride_pixels.get() * size_of::<i16>()) };
        // SAFETY: cannot be non-zero
        let src_stride_bytes =
            unsafe { NonZeroUsize::new_unchecked(src_stride_pixels.get() * size_of::<i16>()) };

        // SAFETY: pointers and strides are valid for provided slices.
        unsafe {
            (self.resize_i16_fn)(
                self,
                dest.as_mut_ptr().cast(),
                dest_stride_bytes,
                src.as_ptr().cast(),
                src_stride_bytes,
                horizontal_vectors,
            );
        }
    }

    #[must_use]
    #[cfg_attr(
        feature = "tracing",
        tracing::instrument(
            skip_all,
            name = "resize::resize_i16_to_vec",
            fields(horizontal_vectors)
        )
    )]
    #[inline]
    pub fn resize_i16_to_vec(
        &self,
        dest_stride_pixels: NonZeroUsize,
        src: &[i16],
        src_stride_pixels: NonZeroUsize,
        horizontal_vectors: bool,
    ) -> Vec<i16> {
        let mut dest = uninit_vec(dest_stride_pixels.get() * self.dest_height.get());
        self.init_stride_padding(&mut dest, dest_stride_pixels, 0i16);

        self.resize_i16(
            // SAFETY: this is where we initialize the `dest` values
            unsafe { dest.assume_init_mut() },
            dest_stride_pixels,
            src,
            src_stride_pixels,
            horizontal_vectors,
        );
        // SAFETY: selected backend writes every active pixel; row padding was initialized above.
        unsafe { assume_init_vec(dest) }
    }
}

pub(crate) type SimpleResizeFn = unsafe fn(
    resizer: &SimpleResize,
    dest: *mut u8,
    dest_stride_bytes: NonZeroUsize,
    src: *const u8,
    src_stride_bytes: NonZeroUsize,
    horizontal_vectors: bool,
);