yuvutils_rs/
ayuv_to_rgb.rs

1/*
2 * Copyright (c) Radzivon Bartoshyk, 2/2025. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1.  Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * 3.  Neither the name of the copyright holder nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::yuv_error::check_rgba_destination;
30use crate::yuv_support::{
31    get_yuv_range, search_inverse_transform, CbCrInverseTransform, YuvPacked444Format,
32    YuvSourceChannels,
33};
34use crate::{YuvError, YuvPackedImage, YuvRange, YuvStandardMatrix};
35#[cfg(feature = "rayon")]
36use rayon::iter::{IndexedParallelIterator, ParallelIterator};
37#[cfg(feature = "rayon")]
38use rayon::prelude::{ParallelSlice, ParallelSliceMut};
39
40#[allow(unused, dead_code)]
41macro_rules! cnv_exec {
42    ($src: expr, $dst: expr, $premultiply_alpha: expr, $ts: expr, $bias_y: expr, $bias_uv: expr, $cn: expr, $packed: expr) => {
43        use crate::numerics::*;
44        if $premultiply_alpha {
45            for (src, dst) in $src
46                .chunks_exact(4)
47                .zip($dst.chunks_exact_mut($cn.get_channels_count()))
48            {
49                let y = src[$packed.get_y_ps()] as i16;
50                let u = src[$packed.get_u_ps()] as i16;
51                let v = src[$packed.get_v_ps()] as i16;
52                let a = src[$packed.get_a_ps()];
53                let y_value = (y - $bias_y) as i32 * $ts.y_coef as i32;
54                let cb_value = u - $bias_uv;
55                let cr_value = v - $bias_uv;
56
57                let r = qrshr::<PRECISION, 8>(y_value + $ts.cr_coef as i32 * cr_value as i32);
58                let b = qrshr::<PRECISION, 8>(y_value + $ts.cb_coef as i32 * cb_value as i32);
59                let g = qrshr::<PRECISION, 8>(
60                    y_value
61                        - $ts.g_coeff_1 as i32 * cr_value as i32
62                        - $ts.g_coeff_2 as i32 * cb_value as i32,
63                );
64
65                let r = div_by_255(r as u16 * a as u16);
66                let b = div_by_255(b as u16 * a as u16);
67                let g = div_by_255(g as u16 * a as u16);
68
69                dst[$cn.get_r_channel_offset()] = r as u8;
70                dst[$cn.get_g_channel_offset()] = g as u8;
71                dst[$cn.get_b_channel_offset()] = b as u8;
72                if $cn.has_alpha() {
73                    dst[$cn.get_a_channel_offset()] = a;
74                }
75            }
76        } else {
77            for (src, dst) in $src
78                .chunks_exact(4)
79                .zip($dst.chunks_exact_mut($cn.get_channels_count()))
80            {
81                let y = src[$packed.get_y_ps()] as i16;
82                let u = src[$packed.get_u_ps()] as i16;
83                let v = src[$packed.get_v_ps()] as i16;
84                let a = src[$packed.get_a_ps()];
85                let y_value = (y - $bias_y) as i32 * $ts.y_coef as i32;
86                let cb_value = u - $bias_uv;
87                let cr_value = v - $bias_uv;
88
89                let r = qrshr::<PRECISION, 8>(y_value + $ts.cr_coef as i32 * cr_value as i32);
90                let b = qrshr::<PRECISION, 8>(y_value + $ts.cb_coef as i32 * cb_value as i32);
91                let g = qrshr::<PRECISION, 8>(
92                    y_value
93                        - $ts.g_coeff_1 as i32 * cr_value as i32
94                        - $ts.g_coeff_2 as i32 * cb_value as i32,
95                );
96
97                dst[$cn.get_r_channel_offset()] = r as u8;
98                dst[$cn.get_g_channel_offset()] = g as u8;
99                dst[$cn.get_b_channel_offset()] = b as u8;
100                if $cn.has_alpha() {
101                    dst[$cn.get_a_channel_offset()] = a;
102                }
103            }
104        }
105    };
106}
107
108type RowExecutor = unsafe fn(&[u8], &mut [u8], bool, CbCrInverseTransform<i16>, i16, i16, usize);
109
110#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
111fn default_executor<const DST: u8, const PACKED: u8, const PRECISION: i32>(
112    src: &[u8],
113    dst: &mut [u8],
114    premultiply_alpha: bool,
115    ts: CbCrInverseTransform<i16>,
116    bias_y: i16,
117    bias_uv: i16,
118    _: usize,
119) {
120    let cn: YuvSourceChannels = DST.into();
121    let packed: YuvPacked444Format = PACKED.into();
122    cnv_exec!(src, dst, premultiply_alpha, ts, bias_y, bias_uv, cn, packed);
123}
124
125#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
126fn default_executor_neon<const DST: u8, const PACKED: u8, const PRECISION: i32>(
127    src: &[u8],
128    dst: &mut [u8],
129    premultiply_alpha: bool,
130    ts: CbCrInverseTransform<i16>,
131    bias_y: i16,
132    bias_uv: i16,
133    width: usize,
134) {
135    use crate::neon::neon_ayuv_to_rgba;
136    unsafe {
137        neon_ayuv_to_rgba::<DST, PACKED>(src, dst, &ts, bias_y, bias_uv, width, premultiply_alpha);
138    }
139}
140
141#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "rdm"))]
142fn default_executor_neon_rdm<const DST: u8, const PACKED: u8, const PRECISION: i32>(
143    src: &[u8],
144    dst: &mut [u8],
145    premultiply_alpha: bool,
146    ts: CbCrInverseTransform<i16>,
147    bias_y: i16,
148    bias_uv: i16,
149    width: usize,
150) {
151    use crate::neon::neon_ayuv_to_rgba_rdm;
152    unsafe {
153        neon_ayuv_to_rgba_rdm::<DST, PACKED>(
154            src,
155            dst,
156            &ts,
157            bias_y,
158            bias_uv,
159            width,
160            premultiply_alpha,
161        );
162    }
163}
164
165#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
166#[target_feature(enable = "avx2")]
167unsafe fn default_executor_avx2<const DST: u8, const PACKED: u8, const PRECISION: i32>(
168    src: &[u8],
169    dst: &mut [u8],
170    premultiply_alpha: bool,
171    ts: CbCrInverseTransform<i16>,
172    bias_y: i16,
173    bias_uv: i16,
174    width: usize,
175) {
176    use crate::avx2::avx2_ayuv_to_rgba;
177    avx2_ayuv_to_rgba::<DST, PACKED>(src, dst, &ts, bias_y, bias_uv, width, premultiply_alpha);
178}
179
180#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
181#[target_feature(enable = "sse4.1")]
182unsafe fn default_executor_sse<const DST: u8, const PACKED: u8, const PRECISION: i32>(
183    src: &[u8],
184    dst: &mut [u8],
185    premultiply_alpha: bool,
186    ts: CbCrInverseTransform<i16>,
187    bias_y: i16,
188    bias_uv: i16,
189    _: usize,
190) {
191    let cn: YuvSourceChannels = DST.into();
192    let packed: YuvPacked444Format = PACKED.into();
193    cnv_exec!(src, dst, premultiply_alpha, ts, bias_y, bias_uv, cn, packed);
194}
195
196fn make_executor<const DST: u8, const PACKED: u8, const PRECISION: i32>() -> RowExecutor {
197    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
198    {
199        #[cfg(feature = "avx")]
200        if std::arch::is_x86_feature_detected!("avx2") {
201            return default_executor_avx2::<DST, PACKED, PRECISION>;
202        }
203        #[cfg(feature = "sse")]
204        if std::arch::is_x86_feature_detected!("sse4.1") {
205            return default_executor_sse::<DST, PACKED, PRECISION>;
206        }
207    }
208    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
209    {
210        #[cfg(feature = "rdm")]
211        {
212            if std::arch::is_aarch64_feature_detected!("rdm") {
213                return default_executor_neon_rdm::<DST, PACKED, PRECISION>;
214            }
215        }
216        default_executor_neon::<DST, PACKED, PRECISION>
217    }
218    #[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
219    default_executor::<DST, PACKED, PRECISION>
220}
221
222fn ayuv_to_rgb_launch<const DST: u8, const PACKED: u8>(
223    image: &YuvPackedImage<u8>,
224    dst: &mut [u8],
225    dst_stride: usize,
226    range: YuvRange,
227    matrix: YuvStandardMatrix,
228    premultiply_alpha: bool,
229) -> Result<(), YuvError> {
230    let cn: YuvSourceChannels = DST.into();
231    image.check_constraints444()?;
232    check_rgba_destination(
233        dst,
234        dst_stride as u32,
235        image.width,
236        image.height,
237        cn.get_channels_count(),
238    )?;
239
240    let chroma_range = get_yuv_range(8, range);
241    let kr_kb = matrix.get_kr_kb();
242    let bias_y = chroma_range.bias_y as i16;
243    let bias_uv = chroma_range.bias_uv as i16;
244
245    const PRECISION: i32 = 13;
246
247    let ts =
248        search_inverse_transform(PRECISION, 8, range, matrix, chroma_range, kr_kb).cast::<i16>();
249
250    let iter;
251
252    #[cfg(not(feature = "rayon"))]
253    {
254        iter = image
255            .yuy
256            .chunks_exact(image.yuy_stride as usize)
257            .zip(dst.chunks_exact_mut(dst_stride));
258    }
259    #[cfg(feature = "rayon")]
260    {
261        iter = image
262            .yuy
263            .par_chunks_exact(image.yuy_stride as usize)
264            .zip(dst.par_chunks_exact_mut(dst_stride));
265    }
266
267    let mut _executor: RowExecutor = make_executor::<DST, PACKED, PRECISION>();
268
269    iter.for_each(|(src, dst)| {
270        let src = &src[0..image.width as usize * 4];
271        let dst = &mut dst[0..image.width as usize * cn.get_channels_count()];
272        unsafe {
273            _executor(
274                src,
275                dst,
276                premultiply_alpha,
277                ts,
278                bias_y,
279                bias_uv,
280                image.width as usize,
281            );
282        }
283    });
284
285    Ok(())
286}
287
288macro_rules! d_cnv {
289    ($method: ident, $px_fmt: expr, $packed_fmt: expr, $px_fmt_name: expr, $to_fmt: expr) => {
290        #[doc = concat!("Converts ", $px_fmt_name," to ", $to_fmt," 8-bit depth precision.")]
291        pub fn $method(
292            image: &YuvPackedImage<u8>,
293            dst: &mut [u8],
294            dst_stride: u32,
295            range: YuvRange,
296            matrix: YuvStandardMatrix,
297            premultiply_alpha: bool,
298        ) -> Result<(), YuvError> {
299            ayuv_to_rgb_launch::<{ $px_fmt as u8 }, { $packed_fmt as u8 }>(
300                image,
301                dst,
302                dst_stride as usize,
303                range,
304                matrix,
305                premultiply_alpha,
306            )
307        }
308    };
309}
310
311d_cnv!(
312    vyua_to_rgb,
313    YuvSourceChannels::Rgb,
314    YuvPacked444Format::Vuya,
315    "VUYA",
316    "RGB"
317);
318d_cnv!(
319    vyua_to_rgba,
320    YuvSourceChannels::Rgba,
321    YuvPacked444Format::Vuya,
322    "VUYA",
323    "RGBA"
324);
325
326d_cnv!(
327    ayuv_to_rgb,
328    YuvSourceChannels::Rgb,
329    YuvPacked444Format::Ayuv,
330    "AYUV",
331    "RGB"
332);
333d_cnv!(
334    ayuv_to_rgba,
335    YuvSourceChannels::Rgba,
336    YuvPacked444Format::Ayuv,
337    "AYUV",
338    "RGBA"
339);