video_resize/resize/
mod.rs

1pub mod algorithms;
2
3use anyhow::Result;
4use std::{
5    mem::align_of,
6    num::{NonZeroU8, NonZeroUsize},
7};
8use v_frame::{
9    chroma::ChromaSubsampling,
10    frame::{Frame, FrameBuilder},
11    pixel::Pixel,
12};
13
14use crate::util::{ceil_n, round_halfup};
15
16pub fn should_resize_horiz_first(width_ratio: f32, height_ratio: f32) -> bool {
17    let horiz_first_cost = width_ratio
18        .max(1.0)
19        .mul_add(2.0, width_ratio * height_ratio.max(1.0));
20    let vert_first_cost = (height_ratio * width_ratio.max(1.0)).mul_add(2.0, height_ratio.max(1.0));
21
22    horiz_first_cost < vert_first_cost
23}
24
25pub fn resize_horizontal<T: Pixel, F: ResizeAlgorithm>(
26    input: &Frame<T>,
27    dest_width: NonZeroUsize,
28    bit_depth: NonZeroU8,
29) -> Result<Frame<T>> {
30    let pixel_max = (1i32 << bit_depth.get()) - 1_i32;
31
32    let mut output: Frame<T> = FrameBuilder::new(
33        dest_width,
34        input.y_plane.height(),
35        input.subsampling,
36        bit_depth,
37    )
38    .build()?;
39    for p in 0..(if input.subsampling == ChromaSubsampling::Monochrome {
40        1
41    } else {
42        3
43    }) {
44        let input_plane = input.plane(p).expect("has plane");
45        let output_plane = output.plane_mut(p).expect("has plane");
46        let src_width = input_plane.width();
47        let dest_width = output_plane.width();
48        let filter = compute_filter::<F>(src_width, dest_width, src_width);
49
50        for (in_row, out_row) in input_plane.rows().zip(output_plane.rows_mut()) {
51            // SAFETY: We control the size and bounds
52            unsafe {
53                #[allow(clippy::needless_range_loop)]
54                for j in 0..dest_width.get() {
55                    let top = *filter.left.get_unchecked(j);
56                    let mut accum = 0i32;
57
58                    for k in 0..filter.filter_width {
59                        let coeff =
60                            i32::from(*filter.data_i16.get_unchecked(j * filter.stride_i16 + k));
61                        let x = unpack_pixel_u16(in_row.get_unchecked(top + k).to_u16().unwrap());
62                        accum += coeff * x;
63                    }
64
65                    *out_row.get_unchecked_mut(j) = match size_of::<T>() {
66                        1 => T::from(pack_pixel_u16(accum, pixel_max) as u8).expect("T is u8"),
67                        2 => T::from(pack_pixel_u16(accum, pixel_max)).expect("T is u16"),
68                        _ => unreachable!(),
69                    };
70                }
71            }
72        }
73    }
74    Ok(output)
75}
76
77pub fn resize_vertical<T: Pixel, F: ResizeAlgorithm>(
78    input: &Frame<T>,
79    dest_height: NonZeroUsize,
80    bit_depth: NonZeroU8,
81) -> Result<Frame<T>> {
82    let pixel_max = (1i32 << bit_depth.get()) - 1_i32;
83
84    let mut output: Frame<T> = FrameBuilder::new(
85        input.y_plane.width(),
86        dest_height,
87        input.subsampling,
88        bit_depth,
89    )
90    .build()?;
91    for p in 0..(if input.subsampling == ChromaSubsampling::Monochrome {
92        1
93    } else {
94        3
95    }) {
96        let input_plane = input.plane(p).expect("plane exists");
97        let output_plane = output.plane_mut(p).expect("plane exists");
98        let src_height = input_plane.height();
99        let dest_height = output_plane.height();
100        let src_width = input_plane.width();
101        let src_stride = input_plane.geometry().stride;
102        let dest_stride = output_plane.geometry().stride;
103        let input_data = &input_plane.data()[input_plane.data_origin()..];
104        let out_origin = output_plane.data_origin();
105        let output_data = &mut output_plane.data_mut()[out_origin..];
106        let filter = compute_filter::<F>(src_height, dest_height, src_height);
107
108        for i in 0..dest_height.get() {
109            // SAFETY: We control the size and bounds
110            unsafe {
111                let filter_coeffs = filter.data_i16.as_ptr().add(i * filter.stride_i16);
112                let top = *filter.left.get_unchecked(i);
113
114                for j in 0..src_width.get() {
115                    let mut accum = 0i32;
116
117                    for k in 0..filter.filter_width {
118                        let coeff = i32::from(*filter_coeffs.add(k));
119                        let x = unpack_pixel_u16(
120                            input_data
121                                .get_unchecked((top + k) * src_stride.get() + j)
122                                .to_u16()
123                                .unwrap(),
124                        );
125                        accum += coeff * x;
126                    }
127
128                    *output_data.get_unchecked_mut(i * dest_stride.get() + j) = match size_of::<T>()
129                    {
130                        1 => T::from(pack_pixel_u16(accum, pixel_max) as u8).expect("T is u8"),
131                        2 => T::from(pack_pixel_u16(accum, pixel_max)).expect("T is u16"),
132                        _ => unreachable!(),
133                    };
134                }
135            }
136        }
137    }
138    Ok(output)
139}
140
141#[inline(always)]
142fn unpack_pixel_u16(x: u16) -> i32 {
143    i32::from(x) + i32::from(i16::MIN)
144}
145
146#[inline(always)]
147fn pack_pixel_u16(x: i32, pixel_max: i32) -> u16 {
148    let x = ((x + (1_i32 << 13usize)) >> 14usize) - i32::from(i16::MIN);
149    let x = x.min(pixel_max).max(0_i32);
150
151    x as u16
152}
153
154/// Specifies the target resolution for the resized image.
155#[derive(Debug, Clone, Copy)]
156pub struct ResizeDimensions {
157    pub width: NonZeroUsize,
158    pub height: NonZeroUsize,
159}
160
161pub trait ResizeAlgorithm {
162    fn support() -> u32;
163    fn new() -> Self;
164    fn process(&self, x: f64) -> f64;
165}
166
167struct FilterContext {
168    filter_width: usize,
169    // TODO: Enable these fields if v_frame ever supports f32 types
170    // filter_rows: usize,
171    // input_width: usize,
172    // stride: usize,
173    // data: Box<[f32]>,
174    stride_i16: usize,
175    data_i16: Box<[i16]>,
176    left: Box<[usize]>,
177}
178
179fn compute_filter<F: ResizeAlgorithm>(
180    src_dim: NonZeroUsize,
181    dest_dim: NonZeroUsize,
182    width: NonZeroUsize,
183) -> FilterContext {
184    let scale = dest_dim.get() as f64 / width.get() as f64;
185    let step = scale.min(1.0);
186    let support = f64::from(F::support()) / step;
187    let filter_size = (support.ceil() as usize * 2).max(1);
188    let f = F::new();
189    // This represents a row-major matrix with dest_dim rows and src_dim cols
190    //
191    // TODO: We should be able to represent this as a compressed sparse matrix
192    // to reduce memory usage.
193    let mut m: Vec<f64> = vec![0.0_f64; dest_dim.get() * src_dim.get()];
194
195    let src_dim_f = src_dim.get() as f64;
196    for i in 0..dest_dim.get() {
197        // Position of output sample on input grid.
198        let pos = (i as f64 + 0.5_f64) / scale;
199        let begin_pos = round_halfup((filter_size as f64).mul_add(-0.5, pos)) + 0.5_f64;
200
201        let mut total = 0.0_f64;
202        for j in 0..filter_size {
203            let xpos = begin_pos + j as f64;
204            total += f.process((xpos - pos) * step);
205        }
206
207        let mut left = usize::MAX;
208
209        for j in 0..filter_size {
210            let xpos = begin_pos + j as f64;
211
212            // Mirror the position if it goes beyond image bounds.
213            let real_pos = if xpos < 0.0_f64 {
214                -xpos
215            } else if xpos >= src_dim_f {
216                2.0f64.mul_add(src_dim_f, -xpos)
217            } else {
218                xpos
219            };
220
221            // Clamp the position if it is still out of bounds.
222            let real_pos = real_pos.max(0.0);
223
224            let idx = (real_pos.floor() as usize).min(src_dim.get() - 1);
225            // SAFETY: We control the size and bounds
226            unsafe {
227                *m.get_unchecked_mut(i * src_dim.get() + idx) +=
228                    f.process((xpos - pos) * step) / total;
229            }
230            left = left.min(idx);
231        }
232    }
233
234    matrix_to_filter(&m, src_dim)
235}
236
237fn matrix_to_filter(m: &[f64], input_width: NonZeroUsize) -> FilterContext {
238    assert!(!m.is_empty());
239
240    let height = m.len() / input_width;
241    let width = m.chunks_exact(input_width.get()).fold(0, |max, row| {
242        let mut first = None;
243        let mut last = None;
244        for (idx, val) in row.iter().enumerate() {
245            // We want to find the first and last index that have a non-zero value.
246            if first.is_none() {
247                if *val == 0.0_f64 {
248                    continue;
249                }
250                first = Some(idx);
251            }
252            if *val == 0.0_f64 {
253                // This is the end of the non-sparse values.
254                break;
255            }
256            last = Some(idx);
257        }
258        let width = last.unwrap() + 1 - first.unwrap();
259        max.max(width)
260    });
261    // TODO: Enable this code if v_frame ever supports f32 types
262    // let stride = ceil_n(width, align_of::<f32>());
263    let stride_i16 = ceil_n(width, align_of::<u16>());
264    let mut e = FilterContext {
265        filter_width: width,
266        // TODO: Enable these fields if v_frame ever supports f32 types
267        // filter_rows: height,
268        // input_width: m.ncols(),
269        // stride,
270        // data: vec![0.0; stride * height].into_boxed_slice(),
271        stride_i16,
272        data_i16: vec![0; stride_i16 * height].into_boxed_slice(),
273        left: vec![0; height].into_boxed_slice(),
274    };
275
276    for (i, row) in m.chunks_exact(input_width.get()).enumerate() {
277        let left = row
278            .iter()
279            .position(|val| *val != 0.0_f64)
280            .unwrap()
281            .min(row.len() - width);
282        let mut f32_err = 0.0_f64;
283        let mut i16_err = 0.0_f64;
284        let mut f32_sum = 0.0_f64;
285        let mut i16_sum = 0_i16;
286        let mut i16_greatest = 0_i16;
287        let mut i16_greatest_idx = 0usize;
288
289        // Dither filter coefficients when rounding them to their storage format.
290        // This minimizes accumulation of error and ensures that the filter
291        // continues to sum as close to 1.0 as possible after rounding.
292        for j in 0..width {
293            // SAFETY: We control the size and bounds
294            unsafe {
295                let coeff = *row.get_unchecked(left + j);
296
297                let coeff_expected_f32 = coeff - f32_err;
298                let coeff_expected_i16 = coeff.mul_add(f64::from(1i16 << 14usize), -i16_err);
299
300                let coeff_f32 = coeff_expected_f32 as f32;
301                let coeff_i16 = coeff_expected_i16.round() as i16;
302
303                #[allow(clippy::unnecessary_cast)]
304                {
305                    f32_err = coeff_expected_f32 as f64 - coeff_expected_f32;
306                    i16_err = coeff_expected_i16 as f64 - coeff_expected_i16;
307                }
308
309                if coeff_i16.abs() > i16_greatest {
310                    i16_greatest = coeff_i16;
311                    i16_greatest_idx = j;
312                }
313
314                f32_sum += f64::from(coeff_f32);
315                i16_sum += coeff_i16;
316
317                // TODO: Enable this code if v_frame ever supports f32 types
318                // *e.data.get_unchecked_mut(i * stride + j) = coeff_f32;
319                *e.data_i16.get_unchecked_mut(i * stride_i16 + j) = coeff_i16;
320            }
321        }
322
323        /* The final sum may still be off by a few ULP. This can not be fixed for
324         * floating point data, since the error is dependent on summation order,
325         * but for integer data, the error can be added to the greatest coefficient.
326         */
327        debug_assert!(
328            1.0_f64 - f32_sum <= f64::from(f32::EPSILON),
329            "error too great"
330        );
331        debug_assert!((1i16 << 14usize) - i16_sum <= 1, "error too great");
332
333        // SAFETY: We control the size and bounds
334        unsafe {
335            *e.data_i16
336                .get_unchecked_mut(i * e.stride_i16 + i16_greatest_idx) +=
337                (1i16 << 14usize) - i16_sum;
338            *e.left.get_unchecked_mut(i) = left;
339        }
340    }
341
342    e
343}
video_resize/resize/mod.rs

video_resize/resize/
mod.rs