yuvutils_rs/
y_with_alpha_to_rgb.rs

1/*
2 * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1.  Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * 3.  Neither the name of the copyright holder nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
30use crate::neon::neon_y_to_rgb_alpha_row;
31use crate::numerics::qrshr;
32use crate::yuv_error::check_rgba_destination;
33use crate::yuv_support::*;
34use crate::{YuvError, YuvGrayAlphaImage};
35use num_traits::AsPrimitive;
36#[cfg(feature = "rayon")]
37use rayon::iter::{IndexedParallelIterator, ParallelIterator};
38#[cfg(feature = "rayon")]
39use rayon::prelude::{ParallelSlice, ParallelSliceMut};
40use std::fmt::Debug;
41use std::marker::PhantomData;
42
43struct WideRowProcessor<T> {
44    _phantom: PhantomData<T>,
45    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
46    _use_rdm: bool,
47    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
48    _use_sse: bool,
49    #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
50    _use_avx: bool,
51}
52
53impl<V> Default for WideRowProcessor<V> {
54    fn default() -> Self {
55        WideRowProcessor {
56            _phantom: PhantomData,
57            #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
58            _use_rdm: std::arch::is_aarch64_feature_detected!("rdm"),
59            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
60            _use_sse: std::arch::is_x86_feature_detected!("sse4.1"),
61            #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
62            _use_avx: std::arch::is_x86_feature_detected!("avx2"),
63        }
64    }
65}
66
67trait ProcessRowHandler<V> {
68    fn handle_row<const PRECISION: i32, const DESTINATION_CHANNELS: u8>(
69        &self,
70        range: &YuvChromaRange,
71        transform: &CbCrInverseTransform<i32>,
72        y_plane: &[V],
73        a_plane: &[V],
74        rgba: &mut [V],
75        start_cx: usize,
76        width: usize,
77    ) -> usize;
78}
79
80impl ProcessRowHandler<u16> for WideRowProcessor<u16> {
81    fn handle_row<const PRECISION: i32, const DESTINATION_CHANNELS: u8>(
82        &self,
83        _range: &YuvChromaRange,
84        _transform: &CbCrInverseTransform<i32>,
85        _y_plane: &[u16],
86        _a_plane: &[u16],
87        _rgba: &mut [u16],
88        _start_cx: usize,
89        _width: usize,
90    ) -> usize {
91        0
92    }
93}
94
95impl ProcessRowHandler<u8> for WideRowProcessor<u8> {
96    fn handle_row<const PRECISION: i32, const DESTINATION_CHANNELS: u8>(
97        &self,
98        _range: &YuvChromaRange,
99        _transform: &CbCrInverseTransform<i32>,
100        _y_plane: &[u8],
101        _a_plane: &[u8],
102        _rgba: &mut [u8],
103        _start_cx: usize,
104        _width: usize,
105    ) -> usize {
106        let mut _cx = _start_cx;
107        #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
108        unsafe {
109            let neon_wide_row_handler = if self._use_rdm {
110                #[cfg(feature = "rdm")]
111                {
112                    use crate::neon::neon_y_to_rgb_row_alpha_rdm;
113                    neon_y_to_rgb_row_alpha_rdm::<DESTINATION_CHANNELS>
114                }
115                #[cfg(not(feature = "rdm"))]
116                {
117                    neon_y_to_rgb_alpha_row::<DESTINATION_CHANNELS>
118                }
119            } else {
120                neon_y_to_rgb_alpha_row::<DESTINATION_CHANNELS>
121            };
122
123            let offset =
124                neon_wide_row_handler(_range, _transform, _y_plane, _a_plane, _rgba, _cx, _width);
125            _cx = offset;
126        }
127        #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
128        if self._use_avx {
129            use crate::avx2::avx2_y_to_rgba_alpha_row;
130            let offset = avx2_y_to_rgba_alpha_row::<DESTINATION_CHANNELS>(
131                _range, _transform, _y_plane, _a_plane, _rgba, _cx, _width,
132            );
133            _cx = offset;
134        }
135        #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
136        if self._use_sse {
137            use crate::sse::sse_y_to_rgba_alpha_row;
138            let offset = sse_y_to_rgba_alpha_row::<DESTINATION_CHANNELS>(
139                _range, _transform, _y_plane, _a_plane, _rgba, _cx, _width,
140            );
141            _cx = offset;
142        }
143        _cx
144    }
145}
146
147// Chroma subsampling always assumed as 400
148#[inline]
149fn y_with_alpha_to_rgbx<
150    V: Copy + AsPrimitive<i16> + 'static + Send + Sync + Debug + Default + Clone,
151    const DESTINATION_CHANNELS: u8,
152    const BIT_DEPTH: usize,
153>(
154    image: &YuvGrayAlphaImage<V>,
155    rgba: &mut [V],
156    rgba_stride: u32,
157    range: YuvRange,
158    matrix: YuvStandardMatrix,
159) -> Result<(), YuvError>
160where
161    i32: AsPrimitive<V>,
162    WideRowProcessor<V>: ProcessRowHandler<V>,
163{
164    let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into();
165    let channels = destination_channels.get_channels_count();
166    assert!(
167        destination_channels.has_alpha(),
168        "YUV400 with alpha cannot be called on target image without alpha"
169    );
170    assert_eq!(
171        channels, 4,
172        "YUV400 with alpha cannot be called on target image without alpha"
173    );
174    assert!(
175        (8..=16).contains(&BIT_DEPTH),
176        "Invalid bit depth is provided"
177    );
178
179    check_rgba_destination(rgba, rgba_stride, image.width, image.height, channels)?;
180    image.check_constraints()?;
181
182    let chroma_range = get_yuv_range(BIT_DEPTH as u32, range);
183    let kr_kb = matrix.get_kr_kb();
184    const PRECISION: i32 = 13;
185    let inverse_transform =
186        search_inverse_transform(PRECISION, 8, range, matrix, chroma_range, kr_kb);
187    let y_coef = inverse_transform.y_coef as i16;
188    let bias_y = chroma_range.bias_y as i16;
189
190    let iter;
191    let y_iter;
192    let a_iter;
193    #[cfg(feature = "rayon")]
194    {
195        iter = rgba.par_chunks_exact_mut(rgba_stride as usize);
196        y_iter = image.y_plane.par_chunks_exact(image.y_stride as usize);
197        a_iter = image.a_plane.par_chunks_exact(image.a_stride as usize);
198    }
199    #[cfg(not(feature = "rayon"))]
200    {
201        iter = rgba.chunks_exact_mut(rgba_stride as usize);
202        y_iter = image.y_plane.chunks_exact(image.y_stride as usize);
203        a_iter = image.a_plane.chunks_exact(image.a_stride as usize);
204    }
205
206    if range == YuvRange::Limited {
207        let handler = WideRowProcessor::<V>::default();
208        iter.zip(y_iter)
209            .zip(a_iter)
210            .for_each(|((rgba, y_plane), a_plane)| {
211                let y_plane = &y_plane[0..image.width as usize];
212                let mut _cx = 0usize;
213
214                let offset = handler.handle_row::<PRECISION, DESTINATION_CHANNELS>(
215                    &chroma_range,
216                    &inverse_transform,
217                    y_plane,
218                    a_plane,
219                    rgba,
220                    _cx,
221                    image.width as usize,
222                );
223                _cx = offset;
224
225                for ((y_src, a_src), rgba) in y_plane
226                    .iter()
227                    .zip(a_plane)
228                    .zip(rgba.chunks_exact_mut(channels))
229                    .skip(_cx)
230                {
231                    let y_value = (y_src.as_() - bias_y) as i32 * y_coef as i32;
232
233                    let r = qrshr::<PRECISION, BIT_DEPTH>(y_value);
234                    rgba[destination_channels.get_r_channel_offset()] = r.as_();
235                    rgba[destination_channels.get_g_channel_offset()] = r.as_();
236                    rgba[destination_channels.get_b_channel_offset()] = r.as_();
237                    rgba[destination_channels.get_a_channel_offset()] = *a_src;
238                }
239            });
240    } else {
241        iter.zip(y_iter)
242            .zip(a_iter)
243            .for_each(|((rgba, y_plane), a_plane)| {
244                let y_plane = &y_plane[0..image.width as usize];
245                for ((y_src, a_src), rgba) in y_plane
246                    .iter()
247                    .zip(a_plane)
248                    .zip(rgba.chunks_exact_mut(channels))
249                {
250                    let y_value = *y_src;
251                    rgba[destination_channels.get_r_channel_offset()] = y_value;
252                    rgba[destination_channels.get_g_channel_offset()] = y_value;
253                    rgba[destination_channels.get_b_channel_offset()] = y_value;
254                    rgba[destination_channels.get_a_channel_offset()] = *a_src;
255                }
256            });
257    }
258
259    Ok(())
260}
261
262/// Convert YUV 400 planar format with alpha plane to RGBA format.
263///
264/// This function takes YUV 400 planar format data with 8-bit precision,
265/// and converts it to RGBA format with 8-bit per channel precision.
266///
267/// # Arguments
268///
269/// * `gray_alpha_image` - Source gray image with alpha.
270/// * `rgba` - A mutable slice to store the converted RGBA data.
271/// * `rgba_stride` - Elements per row.
272/// * `range` - The YUV range (limited or full).
273/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
274///
275/// # Panics
276///
277/// This function panics if the lengths of the planes or the input BGRA data are not valid based
278/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
279///
280pub fn yuv400_alpha_to_rgba(
281    gray_alpha_image: &YuvGrayAlphaImage<u8>,
282    rgba: &mut [u8],
283    rgba_stride: u32,
284    range: YuvRange,
285    matrix: YuvStandardMatrix,
286) -> Result<(), YuvError> {
287    y_with_alpha_to_rgbx::<u8, { YuvSourceChannels::Rgba as u8 }, 8>(
288        gray_alpha_image,
289        rgba,
290        rgba_stride,
291        range,
292        matrix,
293    )
294}
295
296/// Convert YUV 400 planar format with alpha plane to BGRA format.
297///
298/// This function takes YUV 400 planar format data with 8-bit precision,
299/// and converts it to BGRA format with 8-bit per channel precision.
300///
301/// # Arguments
302///
303/// * `gray_alpha_image` - Source gray image with alpha.
304/// * `bgra` - A mutable slice to store the converted BGRA data.
305/// * `bgra_stride` - Elements per row.
306/// * `range` - The YUV range (limited or full).
307/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
308///
309/// # Panics
310///
311/// This function panics if the lengths of the planes or the input BGRA data are not valid based
312/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
313///
314pub fn yuv400_alpha_to_bgra(
315    gray_alpha_image: &YuvGrayAlphaImage<u8>,
316    bgra: &mut [u8],
317    bgra_stride: u32,
318    range: YuvRange,
319    matrix: YuvStandardMatrix,
320) -> Result<(), YuvError> {
321    y_with_alpha_to_rgbx::<u8, { YuvSourceChannels::Bgra as u8 }, 8>(
322        gray_alpha_image,
323        bgra,
324        bgra_stride,
325        range,
326        matrix,
327    )
328}