yuvutils_rs/
yuv_nv_p10_to_ar30.rs

1/*
2 * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1.  Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * 3.  Neither the name of the copyright holder nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::internals::{ProcessedOffset, RowDBiPlanarInversionHandler};
30use crate::numerics::{qrshr, to_ne};
31use crate::yuv_error::check_rgba_destination;
32use crate::yuv_support::*;
33use crate::{YuvBiPlanarImage, YuvError};
34#[cfg(feature = "rayon")]
35use rayon::iter::{IndexedParallelIterator, ParallelIterator};
36#[cfg(feature = "rayon")]
37use rayon::prelude::{ParallelSlice, ParallelSliceMut};
38
39type RowHandlerFn = unsafe fn(
40    y_plane: &[u16],
41    uv_plane: &[u16],
42    bgra: &mut [u8],
43    width: u32,
44    range: &YuvChromaRange,
45    transform: &CbCrInverseTransform<i32>,
46    start_cx: usize,
47    start_ux: usize,
48) -> ProcessedOffset;
49
50struct RowHandlerBalanced<
51    const AR30_LAYOUT: usize,
52    const AR30_STORE: usize,
53    const NV_ORDER: u8,
54    const SAMPLING: u8,
55    const ENDIANNESS: u8,
56    const BYTES_POSITION: u8,
57    const PRECISION: i32,
58    const BIT_DEPTH: usize,
59> {
60    handler: Option<RowHandlerFn>,
61}
62
63impl<
64        const AR30_LAYOUT: usize,
65        const AR30_STORE: usize,
66        const NV_ORDER: u8,
67        const SAMPLING: u8,
68        const ENDIANNESS: u8,
69        const BYTES_POSITION: u8,
70        const PRECISION: i32,
71        const BIT_DEPTH: usize,
72    > Default
73    for RowHandlerBalanced<
74        AR30_LAYOUT,
75        AR30_STORE,
76        NV_ORDER,
77        SAMPLING,
78        ENDIANNESS,
79        BYTES_POSITION,
80        PRECISION,
81        BIT_DEPTH,
82    >
83{
84    fn default() -> Self {
85        if PRECISION == 14 {
86            assert_eq!(PRECISION, 14);
87            #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
88            {
89                use crate::neon::neon_yuv_nv12_p10_to_ar30_row;
90                return Self {
91                    handler: Some(
92                        neon_yuv_nv12_p10_to_ar30_row::<
93                            NV_ORDER,
94                            SAMPLING,
95                            ENDIANNESS,
96                            BYTES_POSITION,
97                            AR30_LAYOUT,
98                            AR30_STORE,
99                            BIT_DEPTH,
100                        >,
101                    ),
102                };
103            }
104        }
105        Self { handler: None }
106    }
107}
108
109macro_rules! impl_row_handler_nv10_ar30 {
110    ($struct_name:ident) => {
111        impl<
112                const AR30_LAYOUT: usize,
113                const AR30_STORE: usize,
114                const NV_ORDER: u8,
115                const SAMPLING: u8,
116                const ENDIANNESS: u8,
117                const BYTES_POSITION: u8,
118                const PRECISION: i32,
119                const BIT_DEPTH: usize,
120            > RowDBiPlanarInversionHandler<u16, u8, i32>
121            for $struct_name<
122                AR30_LAYOUT,
123                AR30_STORE,
124                NV_ORDER,
125                SAMPLING,
126                ENDIANNESS,
127                BYTES_POSITION,
128                PRECISION,
129                BIT_DEPTH,
130            >
131        {
132            fn handle_row(
133                &self,
134                y_plane: &[u16],
135                uv_plane: &[u16],
136                rgba: &mut [u8],
137                width: u32,
138                chroma: YuvChromaRange,
139                transform: &CbCrInverseTransform<i32>,
140            ) -> ProcessedOffset {
141                if let Some(handler) = self.handler {
142                    unsafe {
143                        return handler(y_plane, uv_plane, rgba, width, &chroma, transform, 0, 0);
144                    }
145                }
146                ProcessedOffset { cx: 0, ux: 0 }
147            }
148        }
149    };
150}
151
152impl_row_handler_nv10_ar30!(RowHandlerBalanced);
153
154fn yuv_nv_p10_to_image_impl_d<
155    const AR30_LAYOUT: usize,
156    const AR30_STORE: usize,
157    const NV_ORDER: u8,
158    const SAMPLING: u8,
159    const ENDIANNESS: u8,
160    const BYTES_POSITION: u8,
161    const PRECISION: i32,
162    const BACK_SHIFT: i32,
163    const BIT_DEPTH: usize,
164>(
165    image: &YuvBiPlanarImage<u16>,
166    ar30: &mut [u8],
167    ar30_stride: u32,
168    range: YuvRange,
169    matrix: YuvStandardMatrix,
170    row_handler: impl RowDBiPlanarInversionHandler<u16, u8, i32> + Send + Sync,
171) -> Result<(), YuvError> {
172    let ar30_layout: Rgb30 = AR30_LAYOUT.into();
173    const CN: usize = 4;
174    let uv_order: YuvNVOrder = NV_ORDER.into();
175    let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into();
176
177    image.check_constraints(chroma_subsampling)?;
178    check_rgba_destination(ar30, ar30_stride, image.width, image.height, CN)?;
179
180    let chroma_range = get_yuv_range(BIT_DEPTH as u32, range);
181    let kr_kb = matrix.get_kr_kb();
182    let i_transform = search_inverse_transform(
183        PRECISION,
184        BIT_DEPTH as u32,
185        range,
186        matrix,
187        chroma_range,
188        kr_kb,
189    );
190    let cr_coef = i_transform.cr_coef;
191    let cb_coef = i_transform.cb_coef;
192    let y_coef = i_transform.y_coef;
193    let g_coef_1 = i_transform.g_coeff_1;
194    let g_coef_2 = i_transform.g_coeff_2;
195
196    let bias_y = chroma_range.bias_y as i32;
197    let bias_uv = chroma_range.bias_uv as i32;
198
199    let msb_shift = 16 - BIT_DEPTH as i32;
200    let width = image.width;
201
202    let process_halved_chroma_row = |y_src: &[u16], uv_src: &[u16], rgba: &mut [u8]| {
203        let processed =
204            row_handler.handle_row(y_src, uv_src, rgba, image.width, chroma_range, &i_transform);
205        if processed.cx != image.width as usize {
206            for ((rgba, y_src), uv_src) in rgba
207                .chunks_exact_mut(CN * 2)
208                .zip(y_src.chunks_exact(2))
209                .zip(uv_src.chunks_exact(2))
210                .skip(processed.cx / 2)
211            {
212                let y_vl0 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32;
213                let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
214                    uv_src[uv_order.get_u_position()],
215                    msb_shift,
216                ) as i32;
217                let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
218                    uv_src[uv_order.get_v_position()],
219                    msb_shift,
220                ) as i32;
221
222                let y_value0: i32 = (y_vl0 - bias_y) * y_coef;
223
224                cb_value -= bias_uv;
225                cr_value -= bias_uv;
226
227                let r_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value0 + cr_coef * cr_value);
228                let b_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value0 + cb_coef * cb_value);
229                let g_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(
230                    y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value,
231                );
232
233                let pixel0 = ar30_layout
234                    .pack::<AR30_STORE>(r_p0, g_p0, b_p0)
235                    .to_ne_bytes();
236                rgba[0] = pixel0[0];
237                rgba[1] = pixel0[1];
238                rgba[2] = pixel0[2];
239                rgba[3] = pixel0[3];
240
241                let y_vl1 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[1], msb_shift) as i32;
242
243                let y_value1: i32 = (y_vl1 - bias_y) * y_coef;
244
245                let r_p1 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value1 + cr_coef * cr_value);
246                let b_p1 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value1 + cb_coef * cb_value);
247                let g_p1 = qrshr::<BACK_SHIFT, BIT_DEPTH>(
248                    y_value1 - g_coef_1 * cr_value - g_coef_2 * cb_value,
249                );
250
251                let pixel1 = ar30_layout
252                    .pack::<AR30_STORE>(r_p1, g_p1, b_p1)
253                    .to_ne_bytes();
254                rgba[4] = pixel1[0];
255                rgba[5] = pixel1[1];
256                rgba[6] = pixel1[2];
257                rgba[7] = pixel1[3];
258            }
259
260            if width & 1 != 0 {
261                let rgba = rgba.chunks_exact_mut(CN * 2).into_remainder();
262                let rgba = &mut rgba[0..CN];
263                let uv_src = uv_src.chunks_exact(2).last().unwrap();
264                let y_src = y_src.chunks_exact(2).remainder();
265
266                let y_vl0 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32;
267                let y_value0: i32 = (y_vl0 - bias_y) * y_coef;
268                let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
269                    uv_src[uv_order.get_u_position()],
270                    msb_shift,
271                ) as i32;
272                let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
273                    uv_src[uv_order.get_v_position()],
274                    msb_shift,
275                ) as i32;
276
277                cb_value -= bias_uv;
278                cr_value -= bias_uv;
279
280                let r_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value0 + cr_coef * cr_value);
281                let b_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value0 + cb_coef * cb_value);
282                let g_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(
283                    y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value,
284                );
285
286                let pixel0 = ar30_layout
287                    .pack::<AR30_STORE>(r_p0, g_p0, b_p0)
288                    .to_ne_bytes();
289                rgba[0] = pixel0[0];
290                rgba[1] = pixel0[1];
291                rgba[2] = pixel0[2];
292                rgba[3] = pixel0[3];
293            }
294        }
295    };
296
297    let y_stride = image.y_stride;
298    let uv_stride = image.uv_stride;
299    let y_plane = image.y_plane;
300    let uv_plane = image.uv_plane;
301
302    if chroma_subsampling == YuvChromaSubsampling::Yuv444 {
303        let iter;
304        #[cfg(feature = "rayon")]
305        {
306            iter = y_plane
307                .par_chunks_exact(y_stride as usize)
308                .zip(uv_plane.par_chunks_exact(uv_stride as usize))
309                .zip(ar30.par_chunks_exact_mut(ar30_stride as usize));
310        }
311        #[cfg(not(feature = "rayon"))]
312        {
313            iter = y_plane
314                .chunks_exact(y_stride as usize)
315                .zip(uv_plane.chunks_exact(uv_stride as usize))
316                .zip(ar30.chunks_exact_mut(ar30_stride as usize));
317        }
318        iter.for_each(|((y_src, uv_src), rgba)| {
319            let y_src = &y_src[0..image.width as usize];
320            let processed = row_handler.handle_row(
321                y_src,
322                uv_src,
323                rgba,
324                image.width,
325                chroma_range,
326                &i_transform,
327            );
328            if processed.cx != image.width as usize {
329                for ((rgba, &y_src), uv_src) in rgba
330                    .chunks_exact_mut(CN)
331                    .zip(y_src.iter())
332                    .zip(uv_src.chunks_exact(2))
333                    .skip(processed.cx)
334                {
335                    let y_vl = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src, msb_shift) as i32;
336                    let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
337                        uv_src[uv_order.get_u_position()],
338                        msb_shift,
339                    ) as i32;
340                    let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
341                        uv_src[uv_order.get_v_position()],
342                        msb_shift,
343                    ) as i32;
344
345                    let y_value: i32 = (y_vl - bias_y) * y_coef;
346
347                    cb_value -= bias_uv;
348                    cr_value -= bias_uv;
349
350                    let r_p = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value + cr_coef * cr_value);
351                    let b_p = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value + cb_coef * cb_value);
352                    let g_p = qrshr::<BACK_SHIFT, BIT_DEPTH>(
353                        y_value - g_coef_1 * cr_value - g_coef_2 * cb_value,
354                    );
355
356                    let pixel0 = ar30_layout.pack::<AR30_STORE>(r_p, g_p, b_p).to_ne_bytes();
357                    rgba[0] = pixel0[0];
358                    rgba[1] = pixel0[1];
359                    rgba[2] = pixel0[2];
360                    rgba[3] = pixel0[3];
361                }
362            }
363        });
364    } else if chroma_subsampling == YuvChromaSubsampling::Yuv422 {
365        let iter;
366        #[cfg(feature = "rayon")]
367        {
368            iter = y_plane
369                .par_chunks_exact(y_stride as usize)
370                .zip(uv_plane.par_chunks_exact(uv_stride as usize))
371                .zip(ar30.par_chunks_exact_mut(ar30_stride as usize));
372        }
373        #[cfg(not(feature = "rayon"))]
374        {
375            iter = y_plane
376                .chunks_exact(y_stride as usize)
377                .zip(uv_plane.chunks_exact(uv_stride as usize))
378                .zip(ar30.chunks_exact_mut(ar30_stride as usize));
379        }
380        iter.for_each(|((y_src, uv_src), rgba)| {
381            process_halved_chroma_row(
382                &y_src[0..image.width as usize],
383                &uv_src[0..(image.width as usize).div_ceil(2) * 2],
384                &mut rgba[0..image.width as usize * CN],
385            );
386        });
387    } else if chroma_subsampling == YuvChromaSubsampling::Yuv420 {
388        let iter;
389        #[cfg(feature = "rayon")]
390        {
391            iter = y_plane
392                .par_chunks_exact(y_stride as usize * 2)
393                .zip(uv_plane.par_chunks_exact(uv_stride as usize))
394                .zip(ar30.par_chunks_exact_mut(ar30_stride as usize * 2));
395        }
396        #[cfg(not(feature = "rayon"))]
397        {
398            iter = y_plane
399                .chunks_exact(y_stride as usize * 2)
400                .zip(uv_plane.chunks_exact(uv_stride as usize))
401                .zip(ar30.chunks_exact_mut(ar30_stride as usize * 2));
402        }
403        iter.for_each(|((y_src, uv_src), rgba)| {
404            for (y_src, rgba) in y_src
405                .chunks_exact(y_stride as usize)
406                .zip(rgba.chunks_exact_mut(ar30_stride as usize))
407            {
408                process_halved_chroma_row(
409                    &y_src[0..image.width as usize],
410                    &uv_src[0..(image.width as usize).div_ceil(2) * 2],
411                    &mut rgba[0..image.width as usize * CN],
412                );
413            }
414        });
415        if image.height & 1 != 0 {
416            let y_src = y_plane.chunks_exact(y_stride as usize * 2).remainder();
417            let uv_src = uv_plane.chunks_exact(uv_stride as usize).last().unwrap();
418            let rgba = ar30
419                .chunks_exact_mut(ar30_stride as usize * 2)
420                .into_remainder();
421            process_halved_chroma_row(
422                &y_src[0..image.width as usize],
423                &uv_src[0..(image.width as usize).div_ceil(2) * 2],
424                &mut rgba[0..image.width as usize * CN],
425            );
426        }
427    } else {
428        unreachable!();
429    }
430
431    Ok(())
432}
433
434#[inline]
435fn yuv_nv_p10_to_image_impl<
436    const AR30_LAYOUT: usize,
437    const NV_ORDER: u8,
438    const SAMPLING: u8,
439    const ENDIANNESS: u8,
440    const BYTES_POSITION: u8,
441    const BIT_DEPTH: usize,
442    const BACK_SHIFT: i32,
443>(
444    image: &YuvBiPlanarImage<u16>,
445    bgra: &mut [u8],
446    bgra_stride: u32,
447    order: Rgb30ByteOrder,
448    range: YuvRange,
449    matrix: YuvStandardMatrix,
450) -> Result<(), YuvError> {
451    match order {
452        Rgb30ByteOrder::Host => yuv_nv_p10_to_image_impl_d::<
453            AR30_LAYOUT,
454            { Rgb30ByteOrder::Host as usize },
455            NV_ORDER,
456            SAMPLING,
457            ENDIANNESS,
458            BYTES_POSITION,
459            14,
460            BACK_SHIFT,
461            BIT_DEPTH,
462        >(
463            image,
464            bgra,
465            bgra_stride,
466            range,
467            matrix,
468            RowHandlerBalanced::<
469                AR30_LAYOUT,
470                { Rgb30ByteOrder::Host as usize },
471                NV_ORDER,
472                SAMPLING,
473                ENDIANNESS,
474                BYTES_POSITION,
475                14,
476                BIT_DEPTH,
477            >::default(),
478        ),
479        Rgb30ByteOrder::Network => yuv_nv_p10_to_image_impl_d::<
480            AR30_LAYOUT,
481            { Rgb30ByteOrder::Network as usize },
482            NV_ORDER,
483            SAMPLING,
484            ENDIANNESS,
485            BYTES_POSITION,
486            14,
487            BACK_SHIFT,
488            BIT_DEPTH,
489        >(
490            image,
491            bgra,
492            bgra_stride,
493            range,
494            matrix,
495            RowHandlerBalanced::<
496                AR30_LAYOUT,
497                { Rgb30ByteOrder::Network as usize },
498                NV_ORDER,
499                SAMPLING,
500                ENDIANNESS,
501                BYTES_POSITION,
502                14,
503                BIT_DEPTH,
504            >::default(),
505        ),
506    }
507}
508
509macro_rules! define_cnv {
510    ($method: ident, $name: expr, $ar_name:expr, $px_fmt: expr, $chroma_subsampling: expr, $bit_depth: expr, $back_shift: expr) => {
511        #[doc = concat!("
512Converts ", $name, " to ", $ar_name," format.
513This function takes ", $name, " data with ", stringify!($bit_depth),"-bit precision
514and converts it to ", $ar_name," format.
515
516# Arguments
517
518* `bi_planar_image` - Source Bi-Planar ", $bit_depth,"-bit image.
519* `dst` - A mutable slice to store the converted ", $ar_name, " data.
520* `dst_stride` - The stride for the ", $ar_name, " image data.
521* `byte_order` - see [Rgb30ByteOrder] for more info.
522* `range` - range of YUV, see [YuvRange] for more info.
523* `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
524
525# Panics
526
527This function panics if the lengths of the planes or the input ", $ar_name," data are not valid based
528on the specified width, height, and strides, or if invalid YUV range or matrix is provided.")]
529        pub fn $method(
530            bi_planar_image: &YuvBiPlanarImage<u16>,
531            dst: &mut [u8],
532            dst_stride: u32,
533            byte_order: Rgb30ByteOrder,
534            range: YuvRange,
535            matrix: YuvStandardMatrix,
536        ) -> Result<(), YuvError> {
537               yuv_nv_p10_to_image_impl::<{ $px_fmt as usize },
538                            { YuvNVOrder::UV as u8 },
539                            { $chroma_subsampling as u8 },
540                            { YuvEndianness::LittleEndian as u8 },
541                            { YuvBytesPacking::MostSignificantBytes as u8 },
542                        $bit_depth, $back_shift>(
543                            bi_planar_image,
544                            dst,
545                            dst_stride,
546                            byte_order,
547                            range,
548                            matrix,
549                    )
550        }
551    };
552}
553
554define_cnv!(
555    p010_to_ar30,
556    "P010",
557    "AR30",
558    Rgb30::Ar30,
559    YuvChromaSubsampling::Yuv420,
560    10,
561    14
562);
563define_cnv!(
564    p010_to_ra30,
565    "P010",
566    "RA30",
567    Rgb30::Ra30,
568    YuvChromaSubsampling::Yuv420,
569    10,
570    14
571);
572define_cnv!(
573    p210_to_ar30,
574    "P210",
575    "AR30",
576    Rgb30::Ar30,
577    YuvChromaSubsampling::Yuv422,
578    10,
579    14
580);
581define_cnv!(
582    p210_to_ra30,
583    "P210",
584    "RA30",
585    Rgb30::Ra30,
586    YuvChromaSubsampling::Yuv422,
587    10,
588    14
589);
590
591define_cnv!(
592    p012_to_ar30,
593    "P012",
594    "AR30",
595    Rgb30::Ar30,
596    YuvChromaSubsampling::Yuv420,
597    12,
598    16
599);
600define_cnv!(
601    p012_to_ra30,
602    "P012",
603    "RA30",
604    Rgb30::Ra30,
605    YuvChromaSubsampling::Yuv420,
606    12,
607    16
608);
609define_cnv!(
610    p212_to_ar30,
611    "P212",
612    "AR30",
613    Rgb30::Ar30,
614    YuvChromaSubsampling::Yuv422,
615    12,
616    16
617);
618define_cnv!(
619    p212_to_ra30,
620    "P212",
621    "RA30",
622    Rgb30::Ra30,
623    YuvChromaSubsampling::Yuv422,
624    12,
625    16
626);