1use crate::internals::{ProcessedOffset, RowDBiPlanarInversionHandler};
30use crate::numerics::{qrshr, to_ne};
31use crate::yuv_error::check_rgba_destination;
32use crate::yuv_support::*;
33use crate::{YuvBiPlanarImage, YuvError};
34#[cfg(feature = "rayon")]
35use rayon::iter::{IndexedParallelIterator, ParallelIterator};
36#[cfg(feature = "rayon")]
37use rayon::prelude::{ParallelSlice, ParallelSliceMut};
38
39type RowHandlerFn = unsafe fn(
40 y_plane: &[u16],
41 uv_plane: &[u16],
42 bgra: &mut [u8],
43 width: u32,
44 range: &YuvChromaRange,
45 transform: &CbCrInverseTransform<i32>,
46 start_cx: usize,
47 start_ux: usize,
48) -> ProcessedOffset;
49
50struct RowHandlerBalanced<
51 const DESTINATION_CHANNELS: u8,
52 const NV_ORDER: u8,
53 const SAMPLING: u8,
54 const ENDIANNESS: u8,
55 const BYTES_POSITION: u8,
56 const PRECISION: i32,
57 const BIT_DEPTH: usize,
58> {
59 handler: Option<RowHandlerFn>,
60}
61
62#[cfg(feature = "professional_mode")]
63struct RowHandlerProfessional<
64 const DESTINATION_CHANNELS: u8,
65 const NV_ORDER: u8,
66 const SAMPLING: u8,
67 const ENDIANNESS: u8,
68 const BYTES_POSITION: u8,
69 const PRECISION: i32,
70 const BIT_DEPTH: usize,
71> {
72 handler: Option<RowHandlerFn>,
73}
74
75impl<
76 const DESTINATION_CHANNELS: u8,
77 const NV_ORDER: u8,
78 const SAMPLING: u8,
79 const ENDIANNESS: u8,
80 const BYTES_POSITION: u8,
81 const PRECISION: i32,
82 const BIT_DEPTH: usize,
83 > Default
84 for RowHandlerBalanced<
85 DESTINATION_CHANNELS,
86 NV_ORDER,
87 SAMPLING,
88 ENDIANNESS,
89 BYTES_POSITION,
90 PRECISION,
91 BIT_DEPTH,
92 >
93{
94 fn default() -> Self {
95 if PRECISION == 13 {
96 assert_eq!(PRECISION, 13);
97 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
98 {
99 use crate::neon::neon_yuv_nv12_p10_to_rgba_row;
100 return Self {
101 handler: Some(
102 neon_yuv_nv12_p10_to_rgba_row::<
103 DESTINATION_CHANNELS,
104 NV_ORDER,
105 SAMPLING,
106 ENDIANNESS,
107 BYTES_POSITION,
108 PRECISION,
109 BIT_DEPTH,
110 >,
111 ),
112 };
113 }
114 }
115 Self { handler: None }
116 }
117}
118
119#[cfg(feature = "professional_mode")]
120impl<
121 const DESTINATION_CHANNELS: u8,
122 const NV_ORDER: u8,
123 const SAMPLING: u8,
124 const ENDIANNESS: u8,
125 const BYTES_POSITION: u8,
126 const PRECISION: i32,
127 const BIT_DEPTH: usize,
128 > Default
129 for RowHandlerProfessional<
130 DESTINATION_CHANNELS,
131 NV_ORDER,
132 SAMPLING,
133 ENDIANNESS,
134 BYTES_POSITION,
135 PRECISION,
136 BIT_DEPTH,
137 >
138{
139 fn default() -> Self {
140 if PRECISION == 14 {
141 assert_eq!(PRECISION, 14);
142 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
143 {
144 use crate::neon::neon_yuv_nv12_p10_to_rgba_row_prof;
145 return Self {
146 handler: Some(
147 neon_yuv_nv12_p10_to_rgba_row_prof::<
148 DESTINATION_CHANNELS,
149 NV_ORDER,
150 SAMPLING,
151 ENDIANNESS,
152 BYTES_POSITION,
153 BIT_DEPTH,
154 >,
155 ),
156 };
157 }
158 }
159 Self { handler: None }
160 }
161}
162
163macro_rules! impl_row_handler {
164 ($struct_name:ident) => {
165 impl<
166 const DESTINATION_CHANNELS: u8,
167 const NV_ORDER: u8,
168 const SAMPLING: u8,
169 const ENDIANNESS: u8,
170 const BYTES_POSITION: u8,
171 const PRECISION: i32,
172 const BIT_DEPTH: usize,
173 > RowDBiPlanarInversionHandler<u16, u8, i32>
174 for $struct_name<
175 DESTINATION_CHANNELS,
176 NV_ORDER,
177 SAMPLING,
178 ENDIANNESS,
179 BYTES_POSITION,
180 PRECISION,
181 BIT_DEPTH,
182 >
183 {
184 fn handle_row(
185 &self,
186 y_plane: &[u16],
187 uv_plane: &[u16],
188 rgba: &mut [u8],
189 width: u32,
190 chroma: YuvChromaRange,
191 transform: &CbCrInverseTransform<i32>,
192 ) -> ProcessedOffset {
193 if let Some(handler) = self.handler {
194 unsafe {
195 return handler(y_plane, uv_plane, rgba, width, &chroma, transform, 0, 0);
196 }
197 }
198 ProcessedOffset { cx: 0, ux: 0 }
199 }
200 }
201 };
202}
203
204impl_row_handler!(RowHandlerBalanced);
205#[cfg(feature = "professional_mode")]
206impl_row_handler!(RowHandlerProfessional);
207
208fn yuv_nv_p10_to_image_impl_d<
209 const DESTINATION_CHANNELS: u8,
210 const NV_ORDER: u8,
211 const SAMPLING: u8,
212 const ENDIANNESS: u8,
213 const BYTES_POSITION: u8,
214 const PRECISION: i32,
215 const V_R_SHR: i32,
216>(
217 image: &YuvBiPlanarImage<u16>,
218 bgra: &mut [u8],
219 bgra_stride: u32,
220 range: YuvRange,
221 matrix: YuvStandardMatrix,
222 row_handler: impl RowDBiPlanarInversionHandler<u16, u8, i32> + Send + Sync,
223) -> Result<(), YuvError> {
224 let dst_chans: YuvSourceChannels = DESTINATION_CHANNELS.into();
225 let channels = dst_chans.get_channels_count();
226 let uv_order: YuvNVOrder = NV_ORDER.into();
227 let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into();
228
229 const BIT_DEPTH: usize = 10;
230
231 image.check_constraints(chroma_subsampling)?;
232 check_rgba_destination(bgra, bgra_stride, image.width, image.height, channels)?;
233
234 let chroma_range = get_yuv_range(BIT_DEPTH as u32, range);
235 let kr_kb = matrix.get_kr_kb();
236 let i_transform = search_inverse_transform(
237 PRECISION,
238 BIT_DEPTH as u32,
239 range,
240 matrix,
241 chroma_range,
242 kr_kb,
243 );
244 let cr_coef = i_transform.cr_coef;
245 let cb_coef = i_transform.cb_coef;
246 let y_coef = i_transform.y_coef;
247 let g_coef_1 = i_transform.g_coeff_1;
248 let g_coef_2 = i_transform.g_coeff_2;
249
250 let bias_y = chroma_range.bias_y as i32;
251 let bias_uv = chroma_range.bias_uv as i32;
252
253 let msb_shift = 16 - BIT_DEPTH as i32;
254 let width = image.width;
255
256 let process_halved_chroma_row = |y_src: &[u16], uv_src: &[u16], rgba: &mut [u8]| {
257 let processed =
258 row_handler.handle_row(y_src, uv_src, rgba, image.width, chroma_range, &i_transform);
259 if processed.cx != image.width as usize {
260 for ((rgba, y_src), uv_src) in rgba
261 .chunks_exact_mut(channels * 2)
262 .zip(y_src.chunks_exact(2))
263 .zip(uv_src.chunks_exact(2))
264 .skip(processed.cx / 2)
265 {
266 let y_vl0 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32;
267 let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
268 uv_src[uv_order.get_u_position()],
269 msb_shift,
270 ) as i32;
271 let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
272 uv_src[uv_order.get_v_position()],
273 msb_shift,
274 ) as i32;
275
276 let y_value0: i32 = (y_vl0 - bias_y) * y_coef;
277
278 cb_value -= bias_uv;
279 cr_value -= bias_uv;
280
281 let r_p0 = qrshr::<V_R_SHR, 8>(y_value0 + cr_coef * cr_value);
282 let b_p0 = qrshr::<V_R_SHR, 8>(y_value0 + cb_coef * cb_value);
283 let g_p0 =
284 qrshr::<V_R_SHR, 8>(y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value);
285
286 rgba[dst_chans.get_b_channel_offset()] = b_p0 as u8;
287 rgba[dst_chans.get_g_channel_offset()] = g_p0 as u8;
288 rgba[dst_chans.get_r_channel_offset()] = r_p0 as u8;
289
290 if dst_chans.has_alpha() {
291 rgba[dst_chans.get_a_channel_offset()] = 255u8;
292 }
293
294 let y_vl1 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[1], msb_shift) as i32;
295
296 let y_value1: i32 = (y_vl1 - bias_y) * y_coef;
297
298 let r_p1 = qrshr::<V_R_SHR, 8>(y_value1 + cr_coef * cr_value);
299 let b_p1 = qrshr::<V_R_SHR, 8>(y_value1 + cb_coef * cb_value);
300 let g_p1 =
301 qrshr::<V_R_SHR, 8>(y_value1 - g_coef_1 * cr_value - g_coef_2 * cb_value);
302
303 rgba[channels + dst_chans.get_b_channel_offset()] = b_p1 as u8;
304 rgba[channels + dst_chans.get_g_channel_offset()] = g_p1 as u8;
305 rgba[channels + dst_chans.get_r_channel_offset()] = r_p1 as u8;
306
307 if dst_chans.has_alpha() {
308 rgba[channels + dst_chans.get_a_channel_offset()] = 255;
309 }
310 }
311
312 if width & 1 != 0 {
313 let rgba = rgba.chunks_exact_mut(channels * 2).into_remainder();
314 let rgba = &mut rgba[0..channels];
315 let uv_src = uv_src.chunks_exact(2).last().unwrap();
316 let y_src = y_src.chunks_exact(2).remainder();
317
318 let y_vl0 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32;
319 let y_value0: i32 = (y_vl0 - bias_y) * y_coef;
320 let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
321 uv_src[uv_order.get_u_position()],
322 msb_shift,
323 ) as i32;
324 let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
325 uv_src[uv_order.get_v_position()],
326 msb_shift,
327 ) as i32;
328
329 cb_value -= bias_uv;
330 cr_value -= bias_uv;
331
332 let r_p0 = qrshr::<V_R_SHR, 8>(y_value0 + cr_coef * cr_value);
333 let b_p0 = qrshr::<V_R_SHR, 8>(y_value0 + cb_coef * cb_value);
334 let g_p0 =
335 qrshr::<V_R_SHR, 8>(y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value);
336
337 rgba[dst_chans.get_b_channel_offset()] = b_p0 as u8;
338 rgba[dst_chans.get_g_channel_offset()] = g_p0 as u8;
339 rgba[dst_chans.get_r_channel_offset()] = r_p0 as u8;
340
341 if dst_chans.has_alpha() {
342 rgba[dst_chans.get_a_channel_offset()] = 255u8;
343 }
344 }
345 }
346 };
347
348 let y_stride = image.y_stride;
349 let uv_stride = image.uv_stride;
350 let y_plane = image.y_plane;
351 let uv_plane = image.uv_plane;
352
353 if chroma_subsampling == YuvChromaSubsampling::Yuv444 {
354 let iter;
355 #[cfg(feature = "rayon")]
356 {
357 iter = y_plane
358 .par_chunks_exact(y_stride as usize)
359 .zip(uv_plane.par_chunks_exact(uv_stride as usize))
360 .zip(bgra.par_chunks_exact_mut(bgra_stride as usize));
361 }
362 #[cfg(not(feature = "rayon"))]
363 {
364 iter = y_plane
365 .chunks_exact(y_stride as usize)
366 .zip(uv_plane.chunks_exact(uv_stride as usize))
367 .zip(bgra.chunks_exact_mut(bgra_stride as usize));
368 }
369 iter.for_each(|((y_src, uv_src), rgba)| {
370 let y_src = &y_src[0..image.width as usize];
371 let processed = row_handler.handle_row(
372 y_src,
373 uv_src,
374 rgba,
375 image.width,
376 chroma_range,
377 &i_transform,
378 );
379 if processed.cx != image.width as usize {
380 for ((rgba, &y_src), uv_src) in rgba
381 .chunks_exact_mut(channels)
382 .zip(y_src.iter())
383 .zip(uv_src.chunks_exact(2))
384 .skip(processed.cx)
385 {
386 let y_vl = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src, msb_shift) as i32;
387 let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
388 uv_src[uv_order.get_u_position()],
389 msb_shift,
390 ) as i32;
391 let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
392 uv_src[uv_order.get_v_position()],
393 msb_shift,
394 ) as i32;
395
396 let y_value: i32 = (y_vl - bias_y) * y_coef;
397
398 cb_value -= bias_uv;
399 cr_value -= bias_uv;
400
401 let r_p16 = qrshr::<V_R_SHR, 8>(y_value + cr_coef * cr_value);
402 let b_p16 = qrshr::<V_R_SHR, 8>(y_value + cb_coef * cb_value);
403 let g_p16 =
404 qrshr::<V_R_SHR, 8>(y_value - g_coef_1 * cr_value - g_coef_2 * cb_value);
405
406 rgba[dst_chans.get_b_channel_offset()] = b_p16 as u8;
407 rgba[dst_chans.get_g_channel_offset()] = g_p16 as u8;
408 rgba[dst_chans.get_r_channel_offset()] = r_p16 as u8;
409
410 if dst_chans.has_alpha() {
411 rgba[dst_chans.get_a_channel_offset()] = 255u8;
412 }
413 }
414 }
415 });
416 } else if chroma_subsampling == YuvChromaSubsampling::Yuv422 {
417 let iter;
418 #[cfg(feature = "rayon")]
419 {
420 iter = y_plane
421 .par_chunks_exact(y_stride as usize)
422 .zip(uv_plane.par_chunks_exact(uv_stride as usize))
423 .zip(bgra.par_chunks_exact_mut(bgra_stride as usize));
424 }
425 #[cfg(not(feature = "rayon"))]
426 {
427 iter = y_plane
428 .chunks_exact(y_stride as usize)
429 .zip(uv_plane.chunks_exact(uv_stride as usize))
430 .zip(bgra.chunks_exact_mut(bgra_stride as usize));
431 }
432 iter.for_each(|((y_src, uv_src), rgba)| {
433 process_halved_chroma_row(
434 &y_src[0..image.width as usize],
435 &uv_src[0..(image.width as usize).div_ceil(2) * 2],
436 &mut rgba[0..image.width as usize * channels],
437 );
438 });
439 } else if chroma_subsampling == YuvChromaSubsampling::Yuv420 {
440 let iter;
441 #[cfg(feature = "rayon")]
442 {
443 iter = y_plane
444 .par_chunks_exact(y_stride as usize * 2)
445 .zip(uv_plane.par_chunks_exact(uv_stride as usize))
446 .zip(bgra.par_chunks_exact_mut(bgra_stride as usize * 2));
447 }
448 #[cfg(not(feature = "rayon"))]
449 {
450 iter = y_plane
451 .chunks_exact(y_stride as usize * 2)
452 .zip(uv_plane.chunks_exact(uv_stride as usize))
453 .zip(bgra.chunks_exact_mut(bgra_stride as usize * 2));
454 }
455 iter.for_each(|((y_src, uv_src), rgba)| {
456 for (y_src, rgba) in y_src
457 .chunks_exact(y_stride as usize)
458 .zip(rgba.chunks_exact_mut(bgra_stride as usize))
459 {
460 process_halved_chroma_row(
461 &y_src[0..image.width as usize],
462 &uv_src[0..(image.width as usize).div_ceil(2) * 2],
463 &mut rgba[0..image.width as usize * channels],
464 );
465 }
466 });
467 if image.height & 1 != 0 {
468 let y_src = y_plane.chunks_exact(y_stride as usize * 2).remainder();
469 let uv_src = uv_plane.chunks_exact(uv_stride as usize).last().unwrap();
470 let rgba = bgra
471 .chunks_exact_mut(bgra_stride as usize * 2)
472 .into_remainder();
473 process_halved_chroma_row(
474 &y_src[0..image.width as usize],
475 &uv_src[0..(image.width as usize).div_ceil(2) * 2],
476 &mut rgba[0..image.width as usize * channels],
477 );
478 }
479 } else {
480 unreachable!();
481 }
482
483 Ok(())
484}
485
486#[inline]
487fn yuv_nv_p10_to_image_impl<
488 const DESTINATION_CHANNELS: u8,
489 const NV_ORDER: u8,
490 const SAMPLING: u8,
491 const ENDIANNESS: u8,
492 const BYTES_POSITION: u8,
493>(
494 image: &YuvBiPlanarImage<u16>,
495 bgra: &mut [u8],
496 bgra_stride: u32,
497 range: YuvRange,
498 matrix: YuvStandardMatrix,
499 mode: YuvConversionMode,
500) -> Result<(), YuvError> {
501 match mode {
502 #[cfg(feature = "fast_mode")]
503 YuvConversionMode::Fast => yuv_nv_p10_to_image_impl_d::<
504 DESTINATION_CHANNELS,
505 NV_ORDER,
506 SAMPLING,
507 ENDIANNESS,
508 BYTES_POSITION,
509 13,
510 15,
511 >(
512 image,
513 bgra,
514 bgra_stride,
515 range,
516 matrix,
517 RowHandlerBalanced::<
518 DESTINATION_CHANNELS,
519 NV_ORDER,
520 SAMPLING,
521 ENDIANNESS,
522 BYTES_POSITION,
523 13,
524 10,
525 >::default(),
526 ),
527 YuvConversionMode::Balanced => yuv_nv_p10_to_image_impl_d::<
528 DESTINATION_CHANNELS,
529 NV_ORDER,
530 SAMPLING,
531 ENDIANNESS,
532 BYTES_POSITION,
533 13,
534 15,
535 >(
536 image,
537 bgra,
538 bgra_stride,
539 range,
540 matrix,
541 RowHandlerBalanced::<
542 DESTINATION_CHANNELS,
543 NV_ORDER,
544 SAMPLING,
545 ENDIANNESS,
546 BYTES_POSITION,
547 13,
548 10,
549 >::default(),
550 ),
551 #[cfg(feature = "professional_mode")]
552 YuvConversionMode::Professional => yuv_nv_p10_to_image_impl_d::<
553 DESTINATION_CHANNELS,
554 NV_ORDER,
555 SAMPLING,
556 ENDIANNESS,
557 BYTES_POSITION,
558 14,
559 16,
560 >(
561 image,
562 bgra,
563 bgra_stride,
564 range,
565 matrix,
566 RowHandlerProfessional::<
567 DESTINATION_CHANNELS,
568 NV_ORDER,
569 SAMPLING,
570 ENDIANNESS,
571 BYTES_POSITION,
572 14,
573 10,
574 >::default(),
575 ),
576 }
577}
578
579macro_rules! d_cnv {
580 ($method: ident, $px_fmt: expr, $subsampling: expr, $yuv_name: expr, $px_name: expr, $bit_precision: expr) => {
581 #[doc = concat!("Convert ", $yuv_name," format to ", $px_name," format.
582
583This function takes ", $yuv_name," data with ", stringify!($bit_precision),"-bit precision
584and converts it to ", $px_name," format with 8-bit precision.
585
586# Arguments
587
588* `bi_planar_image` - Source ", stringify!($bit_precision)," image.
589* `dst` - A mutable slice to store the converted ", $px_name," data.
590* `dst_stride` - The stride (components per row) for the ", $px_name," image data.
591* `range` - range of YUV, see [YuvRange] for more info.
592* `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
593* `mode` - See [YuvConversionMode] for more info.
594
595# Panics
596
597This function panics if the lengths of the planes or the input ", $px_name," data are not valid based
598on the specified width, height, and strides, or if invalid YUV range or matrix is provided.")]
599 pub fn $method(
600 bi_planar_image: &YuvBiPlanarImage<u16>,
601 rgba: &mut [u8],
602 rgba_stride: u32,
603 range: YuvRange,
604 matrix: YuvStandardMatrix,
605 mode: YuvConversionMode,
606 ) -> Result<(), YuvError> {
607 let dispatcher = yuv_nv_p10_to_image_impl::<
608 { $px_fmt as u8 },
609 { YuvNVOrder::UV as u8 },
610 { $subsampling as u8 },
611 { YuvEndianness::LittleEndian as u8 },
612 { YuvBytesPacking::MostSignificantBytes as u8 },
613 >;
614 dispatcher(bi_planar_image, rgba, rgba_stride, range, matrix, mode)
615 }
616 };
617}
618
619d_cnv!(
620 p010_to_rgba,
621 YuvSourceChannels::Rgba,
622 YuvChromaSubsampling::Yuv420,
623 "P010",
624 "RGBA",
625 10
626);
627d_cnv!(
628 p010_to_rgb,
629 YuvSourceChannels::Rgb,
630 YuvChromaSubsampling::Yuv420,
631 "P010",
632 "RGB",
633 10
634);
635d_cnv!(
636 p010_to_bgr,
637 YuvSourceChannels::Bgr,
638 YuvChromaSubsampling::Yuv420,
639 "P010",
640 "BGR",
641 10
642);
643d_cnv!(
644 p010_to_bgra,
645 YuvSourceChannels::Bgra,
646 YuvChromaSubsampling::Yuv420,
647 "P010",
648 "BGRA",
649 10
650);
651
652d_cnv!(
653 p210_to_rgba,
654 YuvSourceChannels::Rgba,
655 YuvChromaSubsampling::Yuv422,
656 "P210",
657 "RGBA",
658 10
659);
660d_cnv!(
661 p210_to_rgb,
662 YuvSourceChannels::Rgb,
663 YuvChromaSubsampling::Yuv422,
664 "P210",
665 "RGB",
666 10
667);
668d_cnv!(
669 p210_to_bgr,
670 YuvSourceChannels::Bgr,
671 YuvChromaSubsampling::Yuv422,
672 "P210",
673 "BGR",
674 10
675);
676d_cnv!(
677 p210_to_bgra,
678 YuvSourceChannels::Bgra,
679 YuvChromaSubsampling::Yuv422,
680 "P210",
681 "BGRA",
682 10
683);
684
685d_cnv!(
686 p410_to_rgba,
687 YuvSourceChannels::Rgba,
688 YuvChromaSubsampling::Yuv444,
689 "P410",
690 "RGBA",
691 10
692);
693d_cnv!(
694 p410_to_rgb,
695 YuvSourceChannels::Rgb,
696 YuvChromaSubsampling::Yuv444,
697 "P410",
698 "RGB",
699 10
700);
701d_cnv!(
702 p410_to_bgr,
703 YuvSourceChannels::Bgr,
704 YuvChromaSubsampling::Yuv444,
705 "P410",
706 "BGR",
707 10
708);
709d_cnv!(
710 p410_to_bgra,
711 YuvSourceChannels::Bgra,
712 YuvChromaSubsampling::Yuv444,
713 "P410",
714 "BGRA",
715 10
716);