yuvutils_rs/
to_identity.rs

1/*
2 * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1.  Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * 3.  Neither the name of the copyright holder nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::numerics::qrshr;
30use crate::yuv_error::check_rgba_destination;
31use crate::yuv_support::{get_yuv_range, YuvSourceChannels};
32use crate::{YuvChromaSubsampling, YuvError, YuvPlanarImageMut, YuvRange};
33use num_traits::AsPrimitive;
34use std::fmt::Debug;
35use std::mem::size_of;
36
37type RgbFullHandler<V> =
38    unsafe fn(y_plane: &mut [V], u_plane: &mut [V], v_plane: &mut [V], rgba: &[V]);
39
40type RgbLimitedHandler<V, J> = unsafe fn(
41    y_plane: &mut [V],
42    u_plane: &mut [V],
43    v_plane: &mut [V],
44    rgba: &[V],
45    y_coef: J,
46    y_bias: i32,
47);
48
49#[inline(always)]
50unsafe fn default_full_converter<V: Copy + 'static, const CN: u8>(
51    y_plane: &mut [V],
52    u_plane: &mut [V],
53    v_plane: &mut [V],
54    rgba: &[V],
55) {
56    let cn: YuvSourceChannels = CN.into();
57
58    for (((y_dst, u_dst), v_dst), rgb_dst) in y_plane
59        .iter_mut()
60        .zip(u_plane.iter_mut())
61        .zip(v_plane.iter_mut())
62        .zip(rgba.chunks_exact(cn.get_channels_count()))
63    {
64        *v_dst = rgb_dst[cn.get_r_channel_offset()];
65        *y_dst = rgb_dst[cn.get_g_channel_offset()];
66        *u_dst = rgb_dst[cn.get_b_channel_offset()];
67    }
68}
69
70#[inline(always)]
71unsafe fn default_limited_converter<
72    V: Copy + 'static + AsPrimitive<i32>,
73    J: Copy + AsPrimitive<i32>,
74    const CN: u8,
75    const BIT_DEPTH: usize,
76    const PRECISION: i32,
77>(
78    y_plane: &mut [V],
79    u_plane: &mut [V],
80    v_plane: &mut [V],
81    rgba: &[V],
82    y_coef: J,
83    y_bias: i32,
84) where
85    i32: AsPrimitive<V>,
86{
87    const PRECISION: i32 = 13;
88    let cn: YuvSourceChannels = CN.into();
89
90    // All channels on identity should use Y range
91    for (((y_dst, u_dst), v_dst), rgb_dst) in y_plane
92        .iter_mut()
93        .zip(u_plane.iter_mut())
94        .zip(v_plane.iter_mut())
95        .zip(rgba.chunks_exact(cn.get_channels_count()))
96    {
97        let c_coef: i32 = y_coef.as_();
98        *v_dst = qrshr::<PRECISION, BIT_DEPTH>(
99            rgb_dst[cn.get_r_channel_offset()].as_() * c_coef + y_bias,
100        )
101        .as_();
102        *y_dst = qrshr::<PRECISION, BIT_DEPTH>(
103            rgb_dst[cn.get_g_channel_offset()].as_() * c_coef + y_bias,
104        )
105        .as_();
106        *u_dst = qrshr::<PRECISION, BIT_DEPTH>(
107            rgb_dst[cn.get_b_channel_offset()].as_() * c_coef + y_bias,
108        )
109        .as_();
110    }
111}
112
113#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
114#[target_feature(enable = "avx2")]
115unsafe fn default_full_converter_avx2<V: Copy + 'static, const CN: u8>(
116    y_plane: &mut [V],
117    u_plane: &mut [V],
118    v_plane: &mut [V],
119    rgba: &[V],
120) {
121    default_full_converter::<V, CN>(y_plane, u_plane, v_plane, rgba);
122}
123
124#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
125#[target_feature(enable = "avx2")]
126unsafe fn default_limited_converter_avx2<
127    V: Copy + 'static + AsPrimitive<i32>,
128    J: Copy + AsPrimitive<i32>,
129    const CN: u8,
130    const BIT_DEPTH: usize,
131    const PRECISION: i32,
132>(
133    y_plane: &mut [V],
134    u_plane: &mut [V],
135    v_plane: &mut [V],
136    rgba: &[V],
137    y_coef: J,
138    y_bias: i32,
139) where
140    i32: AsPrimitive<V>,
141{
142    default_limited_converter::<V, J, CN, BIT_DEPTH, PRECISION>(
143        y_plane, u_plane, v_plane, rgba, y_coef, y_bias,
144    );
145}
146
147#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
148#[target_feature(enable = "sse4.1")]
149unsafe fn default_full_converter_sse4_1<V: Copy + 'static, const CN: u8>(
150    y_plane: &mut [V],
151    u_plane: &mut [V],
152    v_plane: &mut [V],
153    rgba: &[V],
154) {
155    default_full_converter::<V, CN>(y_plane, u_plane, v_plane, rgba);
156}
157
158#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
159#[target_feature(enable = "sse4.1")]
160unsafe fn default_limited_converter_sse4_1<
161    V: Copy + 'static + AsPrimitive<i32>,
162    J: Copy + AsPrimitive<i32>,
163    const CN: u8,
164    const BIT_DEPTH: usize,
165    const PRECISION: i32,
166>(
167    y_plane: &mut [V],
168    u_plane: &mut [V],
169    v_plane: &mut [V],
170    rgba: &[V],
171    y_coef: J,
172    y_bias: i32,
173) where
174    i32: AsPrimitive<V>,
175{
176    default_limited_converter::<V, J, CN, BIT_DEPTH, PRECISION>(
177        y_plane, u_plane, v_plane, rgba, y_coef, y_bias,
178    );
179}
180
181#[cfg(all(
182    any(target_arch = "x86", target_arch = "x86_64"),
183    feature = "nightly_avx512"
184))]
185#[target_feature(enable = "avx512bw")]
186unsafe fn default_full_converter_avx512<V: Copy + 'static, const CN: u8>(
187    y_plane: &mut [V],
188    u_plane: &mut [V],
189    v_plane: &mut [V],
190    rgba: &[V],
191) {
192    default_full_converter::<V, CN>(y_plane, u_plane, v_plane, rgba);
193}
194
195#[cfg(all(
196    any(target_arch = "x86", target_arch = "x86_64"),
197    feature = "nightly_avx512"
198))]
199#[target_feature(enable = "avx512bw")]
200unsafe fn default_limited_converter_avx512<
201    V: Copy + 'static + AsPrimitive<i32>,
202    J: Copy + AsPrimitive<i32>,
203    const CN: u8,
204    const BIT_DEPTH: usize,
205    const PRECISION: i32,
206>(
207    y_plane: &mut [V],
208    u_plane: &mut [V],
209    v_plane: &mut [V],
210    rgba: &[V],
211    y_coef: J,
212    y_bias: i32,
213) where
214    i32: AsPrimitive<V>,
215{
216    default_limited_converter::<V, J, CN, BIT_DEPTH, PRECISION>(
217        y_plane, u_plane, v_plane, rgba, y_coef, y_bias,
218    );
219}
220
221fn make_full_converter<V: Copy + 'static, const CN: u8>() -> RgbFullHandler<V> {
222    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
223    {
224        #[cfg(feature = "nightly_avx512")]
225        if std::arch::is_x86_feature_detected!("avx512bw") {
226            return default_full_converter_avx512::<V, CN>;
227        }
228        #[cfg(feature = "avx")]
229        if std::arch::is_x86_feature_detected!("avx2") {
230            return default_full_converter_avx2::<V, CN>;
231        }
232        #[cfg(feature = "sse")]
233        if std::arch::is_x86_feature_detected!("sse4.1") {
234            return default_full_converter_sse4_1::<V, CN>;
235        }
236    }
237    default_full_converter::<V, CN>
238}
239
240fn make_limited_converter<
241    V: Copy + 'static + AsPrimitive<i32>,
242    J: Copy + AsPrimitive<i32>,
243    const CN: u8,
244    const BIT_DEPTH: usize,
245    const PRECISION: i32,
246>() -> RgbLimitedHandler<V, J>
247where
248    i32: AsPrimitive<V>,
249{
250    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
251    {
252        #[cfg(feature = "nightly_avx512")]
253        if std::arch::is_x86_feature_detected!("avx512bw") {
254            return default_limited_converter_avx512::<V, J, CN, BIT_DEPTH, PRECISION>;
255        }
256        #[cfg(feature = "avx")]
257        if std::arch::is_x86_feature_detected!("avx2") {
258            return default_limited_converter_avx2::<V, J, CN, BIT_DEPTH, PRECISION>;
259        }
260        #[cfg(feature = "sse")]
261        if std::arch::is_x86_feature_detected!("sse4.1") {
262            return default_limited_converter_sse4_1::<V, J, CN, BIT_DEPTH, PRECISION>;
263        }
264    }
265    default_limited_converter::<V, J, CN, BIT_DEPTH, PRECISION>
266}
267
268fn rgbx_to_gbr_impl<
269    V: Copy + AsPrimitive<i32> + 'static + Sized + Debug,
270    J: Copy + AsPrimitive<i32>,
271    const CN: u8,
272    const BIT_DEPTH: usize,
273>(
274    image: &mut YuvPlanarImageMut<V>,
275    rgba: &[V],
276    rgba_stride: u32,
277    yuv_range: YuvRange,
278) -> Result<(), YuvError>
279where
280    i32: AsPrimitive<V> + AsPrimitive<J>,
281{
282    let cn: YuvSourceChannels = CN.into();
283    let channels = cn.get_channels_count();
284    assert!(
285        channels == 3 || channels == 4,
286        "GBR -> RGB is implemented only on 3 and 4 channels"
287    );
288    assert!(
289        (8..=16).contains(&BIT_DEPTH),
290        "Invalid bit depth is provided"
291    );
292    assert!(
293        if BIT_DEPTH > 8 {
294            size_of::<V>() == 2
295        } else {
296            size_of::<V>() == 1
297        },
298        "Unsupported bit depth and data type combination"
299    );
300
301    image.check_constraints(YuvChromaSubsampling::Yuv444)?;
302    check_rgba_destination(rgba, rgba_stride, image.width, image.height, channels)?;
303
304    let y_plane = image.y_plane.borrow_mut();
305    let u_plane = image.u_plane.borrow_mut();
306    let v_plane = image.v_plane.borrow_mut();
307    let y_stride = image.y_stride as usize;
308    let u_stride = image.u_stride as usize;
309    let v_stride = image.v_stride as usize;
310
311    let y_iter = y_plane.chunks_exact_mut(y_stride);
312    let rgba_iter = rgba.chunks_exact(rgba_stride as usize);
313    let u_iter = u_plane.chunks_exact_mut(u_stride);
314    let v_iter = v_plane.chunks_exact_mut(v_stride);
315
316    match yuv_range {
317        YuvRange::Limited => {
318            const PRECISION: i32 = 13;
319            // All channels on identity should use Y range
320            let range = get_yuv_range(BIT_DEPTH as u32, yuv_range);
321            let range_rgba = (1 << BIT_DEPTH) - 1;
322            let y_coef: J = (((range.range_y as f32 / range_rgba as f32) * (1 << PRECISION) as f32)
323                .round() as i32)
324                .as_();
325            let y_bias = range.bias_y as i32 * (1 << PRECISION);
326
327            let row_handler = make_limited_converter::<V, J, CN, BIT_DEPTH, PRECISION>();
328
329            for (((y_dst, u_dst), v_dst), rgba) in y_iter.zip(u_iter).zip(v_iter).zip(rgba_iter) {
330                let y_dst = &mut y_dst[..image.width as usize];
331                let u_dst = &mut u_dst[..image.width as usize];
332                let v_dst = &mut v_dst[..image.width as usize];
333
334                unsafe {
335                    row_handler(y_dst, u_dst, v_dst, rgba, y_coef, y_bias);
336                }
337            }
338        }
339        YuvRange::Full => {
340            let row_handler = make_full_converter::<V, CN>();
341            for (((y_dst, u_dst), v_dst), rgba) in y_iter.zip(u_iter).zip(v_iter).zip(rgba_iter) {
342                let y_dst = &mut y_dst[..image.width as usize];
343                let u_dst = &mut u_dst[..image.width as usize];
344                let v_dst = &mut v_dst[..image.width as usize];
345
346                unsafe {
347                    row_handler(y_dst, u_dst, v_dst, rgba);
348                }
349            }
350        }
351    }
352
353    Ok(())
354}
355
356macro_rules! d_cvn {
357    ($method: ident, $px_fmt: expr, $clazz: ident, $bp: expr, $gb_name: expr, $rgb_name: expr, $rgb_ident: ident, $rgb_stride_ident: ident, $intermediate: ident) => {
358        #[doc = concat!("Convert ",$rgb_name, stringify!($bp)," to ", $gb_name,"
359
360This function takes ", $rgb_name," image format data with ", $bp,"-bit precision,
361and converts it to ", $gb_name," YUV format.
362
363# Arguments
364
365* `image` - Target ", $gb_name," image.
366* `rgb` - A slice to load ",$rgb_name, stringify!($bp)," data.
367* `rgb_stride` - The stride (components per row) for the ",$rgb_name, stringify!($bp)," plane.
368* `range` - Yuv values range.
369
370# Panics
371
372This function panics if the lengths of the planes or the input ",$rgb_name, stringify!($bp)," data are not valid based
373on the specified width, height, and strides is provided.")]
374        pub fn $method(
375            image: &mut YuvPlanarImageMut<$clazz>,
376            $rgb_ident: &[$clazz],
377            $rgb_stride_ident: u32,
378            range: YuvRange,
379        ) -> Result<(), YuvError> {
380            rgbx_to_gbr_impl::<$clazz, $intermediate, { $px_fmt as u8 }, $bp>(
381                image, $rgb_ident, $rgb_stride_ident, range,
382            )
383        }
384    };
385}
386
387d_cvn!(
388    rgb_to_gbr,
389    YuvSourceChannels::Rgb,
390    u8,
391    8,
392    "GBR",
393    "RGB",
394    rgb,
395    rgb_stride,
396    i16
397);
398d_cvn!(
399    bgr_to_gbr,
400    YuvSourceChannels::Bgr,
401    u8,
402    8,
403    "GBR",
404    "BGR",
405    bgr,
406    bgr_stride,
407    i16
408);
409d_cvn!(
410    bgra_to_gbr,
411    YuvSourceChannels::Bgra,
412    u8,
413    8,
414    "GBR",
415    "BGRA",
416    bgra,
417    bgra_stride,
418    i16
419);
420d_cvn!(
421    rgba_to_gbr,
422    YuvSourceChannels::Rgba,
423    u8,
424    8,
425    "GBR",
426    "RGBA",
427    rgba,
428    rgba_stride,
429    i16
430);
431d_cvn!(
432    rgb10_to_gb10,
433    YuvSourceChannels::Rgb,
434    u16,
435    10,
436    "GB10",
437    "RGB",
438    rgb10,
439    rgb10_stride,
440    i16
441);
442d_cvn!(
443    rgba10_to_gb10,
444    YuvSourceChannels::Rgba,
445    u16,
446    10,
447    "GB10",
448    "RGBA",
449    rgba10,
450    rgba10_stride,
451    i16
452);
453d_cvn!(
454    rgb12_to_gb12,
455    YuvSourceChannels::Rgb,
456    u16,
457    12,
458    "GB12",
459    "RGB",
460    rgb12,
461    rgb12_stride,
462    i16
463);
464d_cvn!(
465    rgba12_to_gb12,
466    YuvSourceChannels::Rgba,
467    u16,
468    12,
469    "GB12",
470    "RGBA",
471    rgba12,
472    rgba12_stride,
473    i16
474);
475d_cvn!(
476    rgb14_to_gb14,
477    YuvSourceChannels::Rgb,
478    u16,
479    14,
480    "GB14",
481    "RGB",
482    rgb14,
483    rgb14_stride,
484    i16
485);
486d_cvn!(
487    rgba14_to_gb14,
488    YuvSourceChannels::Rgba,
489    u16,
490    14,
491    "GB14",
492    "RGBA",
493    rgba14,
494    rgba14_stride,
495    i16
496);
497d_cvn!(
498    rgb16_to_gb16,
499    YuvSourceChannels::Rgb,
500    u16,
501    16,
502    "GB16",
503    "RGB",
504    rgb16,
505    rgb16_stride,
506    i32
507);
508d_cvn!(
509    rgba16_to_gb16,
510    YuvSourceChannels::Rgba,
511    u16,
512    16,
513    "GB16",
514    "RGBA",
515    rgba16,
516    rgba16_stride,
517    i32
518);