video_resize/resize/
mod.rs

1pub mod algorithms;
2
3use std::mem::align_of;
4use v_frame::{
5    frame::Frame,
6    prelude::{ChromaSampling, Pixel},
7};
8
9use crate::util::{ceil_n, get_chroma_sampling, round_halfup};
10
11pub fn should_resize_horiz_first(width_ratio: f32, height_ratio: f32) -> bool {
12    let horiz_first_cost = width_ratio
13        .max(1.0)
14        .mul_add(2.0, width_ratio * height_ratio.max(1.0));
15    let vert_first_cost = (height_ratio * width_ratio.max(1.0)).mul_add(2.0, height_ratio.max(1.0));
16
17    horiz_first_cost < vert_first_cost
18}
19
20pub fn resize_horizontal<T: Pixel, F: ResizeAlgorithm>(
21    input: &Frame<T>,
22    dest_width: usize,
23    bit_depth: usize,
24) -> Frame<T> {
25    let chroma_sampling = get_chroma_sampling(input);
26    let pixel_max = (1i32 << bit_depth) - 1_i32;
27
28    let mut output: Frame<T> =
29        Frame::new_with_padding(dest_width, input.planes[0].cfg.height, chroma_sampling, 0);
30    for p in 0..(if chroma_sampling == ChromaSampling::Cs400 {
31        1
32    } else {
33        3
34    }) {
35        let src_width = input.planes[p].cfg.width;
36        let dest_width = output.planes[p].cfg.width;
37        let filter = compute_filter::<F>(src_width, dest_width, 0.0, src_width as f64);
38
39        for (in_row, out_row) in input.planes[p]
40            .rows_iter()
41            .zip(output.planes[p].rows_iter_mut())
42        {
43            // SAFETY: We control the size and bounds
44            unsafe {
45                #[allow(clippy::needless_range_loop)]
46                for j in 0..dest_width {
47                    let top = *filter.left.get_unchecked(j);
48                    let mut accum = 0i32;
49
50                    for k in 0..filter.filter_width {
51                        let coeff =
52                            i32::from(*filter.data_i16.get_unchecked(j * filter.stride_i16 + k));
53                        let x = unpack_pixel_u16(in_row.get_unchecked(top + k).to_u16().unwrap());
54                        accum += coeff * x;
55                    }
56
57                    *out_row.get_unchecked_mut(j) = T::cast_from(pack_pixel_u16(accum, pixel_max));
58                }
59            }
60        }
61    }
62    output
63}
64
65pub fn resize_vertical<T: Pixel, F: ResizeAlgorithm>(
66    input: &Frame<T>,
67    dest_height: usize,
68    bit_depth: usize,
69) -> Frame<T> {
70    let chroma_sampling = get_chroma_sampling(input);
71    let pixel_max = (1i32 << bit_depth) - 1_i32;
72
73    let mut output: Frame<T> =
74        Frame::new_with_padding(input.planes[0].cfg.width, dest_height, chroma_sampling, 0);
75    for p in 0..(if chroma_sampling == ChromaSampling::Cs400 {
76        1
77    } else {
78        3
79    }) {
80        let src_height = input.planes[p].cfg.height;
81        let dest_height = output.planes[p].cfg.height;
82        let src_width = input.planes[p].cfg.width;
83        let src_stride = input.planes[p].cfg.stride;
84        let dest_stride = output.planes[p].cfg.stride;
85        let input_data = input.planes[p].data_origin();
86        let output_data = output.planes[p].data_origin_mut();
87        let filter = compute_filter::<F>(src_height, dest_height, 0.0, src_height as f64);
88
89        for i in 0..dest_height {
90            // SAFETY: We control the size and bounds
91            unsafe {
92                let filter_coeffs = filter.data_i16.as_ptr().add(i * filter.stride_i16);
93                let top = *filter.left.get_unchecked(i);
94
95                for j in 0..src_width {
96                    let mut accum = 0i32;
97
98                    for k in 0..filter.filter_width {
99                        let coeff = i32::from(*filter_coeffs.add(k));
100                        let x = unpack_pixel_u16(
101                            input_data
102                                .get_unchecked((top + k) * src_stride + j)
103                                .to_u16()
104                                .unwrap(),
105                        );
106                        accum += coeff * x;
107                    }
108
109                    *output_data.get_unchecked_mut(i * dest_stride + j) =
110                        T::cast_from(pack_pixel_u16(accum, pixel_max));
111                }
112            }
113        }
114    }
115    output
116}
117
118#[inline(always)]
119fn unpack_pixel_u16(x: u16) -> i32 {
120    i32::from(x) + i32::from(i16::MIN)
121}
122
123#[inline(always)]
124fn pack_pixel_u16(x: i32, pixel_max: i32) -> u16 {
125    let x = ((x + (1_i32 << 13usize)) >> 14usize) - i32::from(i16::MIN);
126    let x = x.min(pixel_max).max(0_i32);
127
128    x as u16
129}
130
131/// Specifies the target resolution for the resized image.
132#[derive(Debug, Clone, Copy)]
133pub struct ResizeDimensions {
134    pub width: usize,
135    pub height: usize,
136}
137
138pub trait ResizeAlgorithm {
139    fn support() -> u32;
140    fn new() -> Self;
141    fn process(&self, x: f64) -> f64;
142}
143
144struct FilterContext {
145    filter_width: usize,
146    // TODO: Enable these fields if v_frame ever supports f32 types
147    // filter_rows: usize,
148    // input_width: usize,
149    // stride: usize,
150    // data: Box<[f32]>,
151    stride_i16: usize,
152    data_i16: Box<[i16]>,
153    left: Box<[usize]>,
154}
155
156fn compute_filter<F: ResizeAlgorithm>(
157    src_dim: usize,
158    dest_dim: usize,
159    shift: f64,
160    width: f64,
161) -> FilterContext {
162    let scale = dest_dim as f64 / width;
163    let step = scale.min(1.0);
164    let support = f64::from(F::support()) / step;
165    let filter_size = (support.ceil() as usize * 2).max(1);
166    let f = F::new();
167    // This represents a row-major matrix with dest_dim rows and src_dim cols
168    //
169    // TODO: We should be able to represent this as a compressed sparse matrix
170    // to reduce memory usage.
171    let mut m: Vec<f64> = vec![0.0_f64; dest_dim * src_dim];
172
173    let src_dim_f = src_dim as f64;
174    for i in 0..dest_dim {
175        // Position of output sample on input grid.
176        let pos = (i as f64 + 0.5_f64) / scale + shift;
177        let begin_pos = round_halfup((filter_size as f64).mul_add(-0.5, pos)) + 0.5_f64;
178
179        let mut total = 0.0_f64;
180        for j in 0..filter_size {
181            let xpos = begin_pos + j as f64;
182            total += f.process((xpos - pos) * step);
183        }
184
185        let mut left = usize::MAX;
186
187        for j in 0..filter_size {
188            let xpos = begin_pos + j as f64;
189
190            // Mirror the position if it goes beyond image bounds.
191            let real_pos = if xpos < 0.0_f64 {
192                -xpos
193            } else if xpos >= src_dim_f {
194                2.0f64.mul_add(src_dim_f, -xpos)
195            } else {
196                xpos
197            };
198
199            // Clamp the position if it is still out of bounds.
200            let real_pos = real_pos.max(0.0);
201
202            let idx = (real_pos.floor() as usize).min(src_dim - 1);
203            // SAFETY: We control the size and bounds
204            unsafe {
205                *m.get_unchecked_mut(i * src_dim + idx) += f.process((xpos - pos) * step) / total;
206            }
207            left = left.min(idx);
208        }
209    }
210
211    matrix_to_filter(&m, src_dim)
212}
213
214fn matrix_to_filter(m: &[f64], input_width: usize) -> FilterContext {
215    assert!(!m.is_empty());
216
217    let height = m.len() / input_width;
218    let width = m.chunks_exact(input_width).fold(0, |max, row| {
219        let mut first = None;
220        let mut last = None;
221        for (idx, val) in row.iter().enumerate() {
222            // We want to find the first and last index that have a non-zero value.
223            if first.is_none() {
224                if *val == 0.0_f64 {
225                    continue;
226                }
227                first = Some(idx);
228            }
229            if *val == 0.0_f64 {
230                // This is the end of the non-sparse values.
231                break;
232            }
233            last = Some(idx);
234        }
235        let width = last.unwrap() + 1 - first.unwrap();
236        max.max(width)
237    });
238    // TODO: Enable this code if v_frame ever supports f32 types
239    // let stride = ceil_n(width, align_of::<f32>());
240    let stride_i16 = ceil_n(width, align_of::<u16>());
241    let mut e = FilterContext {
242        filter_width: width,
243        // TODO: Enable these fields if v_frame ever supports f32 types
244        // filter_rows: height,
245        // input_width: m.ncols(),
246        // stride,
247        // data: vec![0.0; stride * height].into_boxed_slice(),
248        stride_i16,
249        data_i16: vec![0; stride_i16 * height].into_boxed_slice(),
250        left: vec![0; height].into_boxed_slice(),
251    };
252
253    for (i, row) in m.chunks_exact(input_width).enumerate() {
254        let left = row
255            .iter()
256            .position(|val| *val != 0.0_f64)
257            .unwrap()
258            .min(row.len() - width);
259        let mut f32_err = 0.0_f64;
260        let mut i16_err = 0.0_f64;
261        let mut f32_sum = 0.0_f64;
262        let mut i16_sum = 0_i16;
263        let mut i16_greatest = 0_i16;
264        let mut i16_greatest_idx = 0usize;
265
266        // Dither filter coefficients when rounding them to their storage format.
267        // This minimizes accumulation of error and ensures that the filter
268        // continues to sum as close to 1.0 as possible after rounding.
269        for j in 0..width {
270            // SAFETY: We control the size and bounds
271            unsafe {
272                let coeff = *row.get_unchecked(left + j);
273
274                let coeff_expected_f32 = coeff - f32_err;
275                let coeff_expected_i16 = coeff.mul_add(f64::from(1i16 << 14usize), -i16_err);
276
277                let coeff_f32 = coeff_expected_f32 as f32;
278                let coeff_i16 = coeff_expected_i16.round() as i16;
279
280                f32_err = coeff_expected_f32 as f64 - coeff_expected_f32;
281                i16_err = coeff_expected_i16 as f64 - coeff_expected_i16;
282
283                if coeff_i16.abs() > i16_greatest {
284                    i16_greatest = coeff_i16;
285                    i16_greatest_idx = j;
286                }
287
288                f32_sum += f64::from(coeff_f32);
289                i16_sum += coeff_i16;
290
291                // TODO: Enable this code if v_frame ever supports f32 types
292                // *e.data.get_unchecked_mut(i * stride + j) = coeff_f32;
293                *e.data_i16.get_unchecked_mut(i * stride_i16 + j) = coeff_i16;
294            }
295        }
296
297        /* The final sum may still be off by a few ULP. This can not be fixed for
298         * floating point data, since the error is dependent on summation order,
299         * but for integer data, the error can be added to the greatest coefficient.
300         */
301        debug_assert!(
302            1.0_f64 - f32_sum <= f64::from(f32::EPSILON),
303            "error too great"
304        );
305        debug_assert!((1i16 << 14usize) - i16_sum <= 1, "error too great");
306
307        // SAFETY: We control the size and bounds
308        unsafe {
309            *e.data_i16
310                .get_unchecked_mut(i * e.stride_i16 + i16_greatest_idx) +=
311                (1i16 << 14usize) - i16_sum;
312            *e.left.get_unchecked_mut(i) = left;
313        }
314    }
315
316    e
317}
video_resize/resize/mod.rs

video_resize/resize/
mod.rs