1use crate::internals::{ProcessedOffset, RowDBiPlanarInversionHandler};
30use crate::numerics::{qrshr, to_ne};
31use crate::yuv_error::check_rgba_destination;
32use crate::yuv_support::*;
33use crate::{YuvBiPlanarImage, YuvError};
34#[cfg(feature = "rayon")]
35use rayon::iter::{IndexedParallelIterator, ParallelIterator};
36#[cfg(feature = "rayon")]
37use rayon::prelude::{ParallelSlice, ParallelSliceMut};
38
39type RowHandlerFn = unsafe fn(
40 y_plane: &[u16],
41 uv_plane: &[u16],
42 bgra: &mut [u8],
43 width: u32,
44 range: &YuvChromaRange,
45 transform: &CbCrInverseTransform<i32>,
46 start_cx: usize,
47 start_ux: usize,
48) -> ProcessedOffset;
49
50struct RowHandlerBalanced<
51 const AR30_LAYOUT: usize,
52 const AR30_STORE: usize,
53 const NV_ORDER: u8,
54 const SAMPLING: u8,
55 const ENDIANNESS: u8,
56 const BYTES_POSITION: u8,
57 const PRECISION: i32,
58 const BIT_DEPTH: usize,
59> {
60 handler: Option<RowHandlerFn>,
61}
62
63impl<
64 const AR30_LAYOUT: usize,
65 const AR30_STORE: usize,
66 const NV_ORDER: u8,
67 const SAMPLING: u8,
68 const ENDIANNESS: u8,
69 const BYTES_POSITION: u8,
70 const PRECISION: i32,
71 const BIT_DEPTH: usize,
72 > Default
73 for RowHandlerBalanced<
74 AR30_LAYOUT,
75 AR30_STORE,
76 NV_ORDER,
77 SAMPLING,
78 ENDIANNESS,
79 BYTES_POSITION,
80 PRECISION,
81 BIT_DEPTH,
82 >
83{
84 fn default() -> Self {
85 if PRECISION == 14 {
86 assert_eq!(PRECISION, 14);
87 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
88 {
89 use crate::neon::neon_yuv_nv12_p10_to_ar30_row;
90 return Self {
91 handler: Some(
92 neon_yuv_nv12_p10_to_ar30_row::<
93 NV_ORDER,
94 SAMPLING,
95 ENDIANNESS,
96 BYTES_POSITION,
97 AR30_LAYOUT,
98 AR30_STORE,
99 BIT_DEPTH,
100 >,
101 ),
102 };
103 }
104 }
105 Self { handler: None }
106 }
107}
108
109macro_rules! impl_row_handler_nv10_ar30 {
110 ($struct_name:ident) => {
111 impl<
112 const AR30_LAYOUT: usize,
113 const AR30_STORE: usize,
114 const NV_ORDER: u8,
115 const SAMPLING: u8,
116 const ENDIANNESS: u8,
117 const BYTES_POSITION: u8,
118 const PRECISION: i32,
119 const BIT_DEPTH: usize,
120 > RowDBiPlanarInversionHandler<u16, u8, i32>
121 for $struct_name<
122 AR30_LAYOUT,
123 AR30_STORE,
124 NV_ORDER,
125 SAMPLING,
126 ENDIANNESS,
127 BYTES_POSITION,
128 PRECISION,
129 BIT_DEPTH,
130 >
131 {
132 fn handle_row(
133 &self,
134 y_plane: &[u16],
135 uv_plane: &[u16],
136 rgba: &mut [u8],
137 width: u32,
138 chroma: YuvChromaRange,
139 transform: &CbCrInverseTransform<i32>,
140 ) -> ProcessedOffset {
141 if let Some(handler) = self.handler {
142 unsafe {
143 return handler(y_plane, uv_plane, rgba, width, &chroma, transform, 0, 0);
144 }
145 }
146 ProcessedOffset { cx: 0, ux: 0 }
147 }
148 }
149 };
150}
151
152impl_row_handler_nv10_ar30!(RowHandlerBalanced);
153
154fn yuv_nv_p10_to_image_impl_d<
155 const AR30_LAYOUT: usize,
156 const AR30_STORE: usize,
157 const NV_ORDER: u8,
158 const SAMPLING: u8,
159 const ENDIANNESS: u8,
160 const BYTES_POSITION: u8,
161 const PRECISION: i32,
162 const BACK_SHIFT: i32,
163 const BIT_DEPTH: usize,
164>(
165 image: &YuvBiPlanarImage<u16>,
166 ar30: &mut [u8],
167 ar30_stride: u32,
168 range: YuvRange,
169 matrix: YuvStandardMatrix,
170 row_handler: impl RowDBiPlanarInversionHandler<u16, u8, i32> + Send + Sync,
171) -> Result<(), YuvError> {
172 let ar30_layout: Rgb30 = AR30_LAYOUT.into();
173 const CN: usize = 4;
174 let uv_order: YuvNVOrder = NV_ORDER.into();
175 let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into();
176
177 image.check_constraints(chroma_subsampling)?;
178 check_rgba_destination(ar30, ar30_stride, image.width, image.height, CN)?;
179
180 let chroma_range = get_yuv_range(BIT_DEPTH as u32, range);
181 let kr_kb = matrix.get_kr_kb();
182 let i_transform = search_inverse_transform(
183 PRECISION,
184 BIT_DEPTH as u32,
185 range,
186 matrix,
187 chroma_range,
188 kr_kb,
189 );
190 let cr_coef = i_transform.cr_coef;
191 let cb_coef = i_transform.cb_coef;
192 let y_coef = i_transform.y_coef;
193 let g_coef_1 = i_transform.g_coeff_1;
194 let g_coef_2 = i_transform.g_coeff_2;
195
196 let bias_y = chroma_range.bias_y as i32;
197 let bias_uv = chroma_range.bias_uv as i32;
198
199 let msb_shift = 16 - BIT_DEPTH as i32;
200 let width = image.width;
201
202 let process_halved_chroma_row = |y_src: &[u16], uv_src: &[u16], rgba: &mut [u8]| {
203 let processed =
204 row_handler.handle_row(y_src, uv_src, rgba, image.width, chroma_range, &i_transform);
205 if processed.cx != image.width as usize {
206 for ((rgba, y_src), uv_src) in rgba
207 .chunks_exact_mut(CN * 2)
208 .zip(y_src.chunks_exact(2))
209 .zip(uv_src.chunks_exact(2))
210 .skip(processed.cx / 2)
211 {
212 let y_vl0 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32;
213 let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
214 uv_src[uv_order.get_u_position()],
215 msb_shift,
216 ) as i32;
217 let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
218 uv_src[uv_order.get_v_position()],
219 msb_shift,
220 ) as i32;
221
222 let y_value0: i32 = (y_vl0 - bias_y) * y_coef;
223
224 cb_value -= bias_uv;
225 cr_value -= bias_uv;
226
227 let r_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value0 + cr_coef * cr_value);
228 let b_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value0 + cb_coef * cb_value);
229 let g_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(
230 y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value,
231 );
232
233 let pixel0 = ar30_layout
234 .pack::<AR30_STORE>(r_p0, g_p0, b_p0)
235 .to_ne_bytes();
236 rgba[0] = pixel0[0];
237 rgba[1] = pixel0[1];
238 rgba[2] = pixel0[2];
239 rgba[3] = pixel0[3];
240
241 let y_vl1 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[1], msb_shift) as i32;
242
243 let y_value1: i32 = (y_vl1 - bias_y) * y_coef;
244
245 let r_p1 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value1 + cr_coef * cr_value);
246 let b_p1 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value1 + cb_coef * cb_value);
247 let g_p1 = qrshr::<BACK_SHIFT, BIT_DEPTH>(
248 y_value1 - g_coef_1 * cr_value - g_coef_2 * cb_value,
249 );
250
251 let pixel1 = ar30_layout
252 .pack::<AR30_STORE>(r_p1, g_p1, b_p1)
253 .to_ne_bytes();
254 rgba[4] = pixel1[0];
255 rgba[5] = pixel1[1];
256 rgba[6] = pixel1[2];
257 rgba[7] = pixel1[3];
258 }
259
260 if width & 1 != 0 {
261 let rgba = rgba.chunks_exact_mut(CN * 2).into_remainder();
262 let rgba = &mut rgba[0..CN];
263 let uv_src = uv_src.chunks_exact(2).last().unwrap();
264 let y_src = y_src.chunks_exact(2).remainder();
265
266 let y_vl0 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32;
267 let y_value0: i32 = (y_vl0 - bias_y) * y_coef;
268 let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
269 uv_src[uv_order.get_u_position()],
270 msb_shift,
271 ) as i32;
272 let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
273 uv_src[uv_order.get_v_position()],
274 msb_shift,
275 ) as i32;
276
277 cb_value -= bias_uv;
278 cr_value -= bias_uv;
279
280 let r_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value0 + cr_coef * cr_value);
281 let b_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value0 + cb_coef * cb_value);
282 let g_p0 = qrshr::<BACK_SHIFT, BIT_DEPTH>(
283 y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value,
284 );
285
286 let pixel0 = ar30_layout
287 .pack::<AR30_STORE>(r_p0, g_p0, b_p0)
288 .to_ne_bytes();
289 rgba[0] = pixel0[0];
290 rgba[1] = pixel0[1];
291 rgba[2] = pixel0[2];
292 rgba[3] = pixel0[3];
293 }
294 }
295 };
296
297 let y_stride = image.y_stride;
298 let uv_stride = image.uv_stride;
299 let y_plane = image.y_plane;
300 let uv_plane = image.uv_plane;
301
302 if chroma_subsampling == YuvChromaSubsampling::Yuv444 {
303 let iter;
304 #[cfg(feature = "rayon")]
305 {
306 iter = y_plane
307 .par_chunks_exact(y_stride as usize)
308 .zip(uv_plane.par_chunks_exact(uv_stride as usize))
309 .zip(ar30.par_chunks_exact_mut(ar30_stride as usize));
310 }
311 #[cfg(not(feature = "rayon"))]
312 {
313 iter = y_plane
314 .chunks_exact(y_stride as usize)
315 .zip(uv_plane.chunks_exact(uv_stride as usize))
316 .zip(ar30.chunks_exact_mut(ar30_stride as usize));
317 }
318 iter.for_each(|((y_src, uv_src), rgba)| {
319 let y_src = &y_src[0..image.width as usize];
320 let processed = row_handler.handle_row(
321 y_src,
322 uv_src,
323 rgba,
324 image.width,
325 chroma_range,
326 &i_transform,
327 );
328 if processed.cx != image.width as usize {
329 for ((rgba, &y_src), uv_src) in rgba
330 .chunks_exact_mut(CN)
331 .zip(y_src.iter())
332 .zip(uv_src.chunks_exact(2))
333 .skip(processed.cx)
334 {
335 let y_vl = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src, msb_shift) as i32;
336 let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
337 uv_src[uv_order.get_u_position()],
338 msb_shift,
339 ) as i32;
340 let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
341 uv_src[uv_order.get_v_position()],
342 msb_shift,
343 ) as i32;
344
345 let y_value: i32 = (y_vl - bias_y) * y_coef;
346
347 cb_value -= bias_uv;
348 cr_value -= bias_uv;
349
350 let r_p = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value + cr_coef * cr_value);
351 let b_p = qrshr::<BACK_SHIFT, BIT_DEPTH>(y_value + cb_coef * cb_value);
352 let g_p = qrshr::<BACK_SHIFT, BIT_DEPTH>(
353 y_value - g_coef_1 * cr_value - g_coef_2 * cb_value,
354 );
355
356 let pixel0 = ar30_layout.pack::<AR30_STORE>(r_p, g_p, b_p).to_ne_bytes();
357 rgba[0] = pixel0[0];
358 rgba[1] = pixel0[1];
359 rgba[2] = pixel0[2];
360 rgba[3] = pixel0[3];
361 }
362 }
363 });
364 } else if chroma_subsampling == YuvChromaSubsampling::Yuv422 {
365 let iter;
366 #[cfg(feature = "rayon")]
367 {
368 iter = y_plane
369 .par_chunks_exact(y_stride as usize)
370 .zip(uv_plane.par_chunks_exact(uv_stride as usize))
371 .zip(ar30.par_chunks_exact_mut(ar30_stride as usize));
372 }
373 #[cfg(not(feature = "rayon"))]
374 {
375 iter = y_plane
376 .chunks_exact(y_stride as usize)
377 .zip(uv_plane.chunks_exact(uv_stride as usize))
378 .zip(ar30.chunks_exact_mut(ar30_stride as usize));
379 }
380 iter.for_each(|((y_src, uv_src), rgba)| {
381 process_halved_chroma_row(
382 &y_src[0..image.width as usize],
383 &uv_src[0..(image.width as usize).div_ceil(2) * 2],
384 &mut rgba[0..image.width as usize * CN],
385 );
386 });
387 } else if chroma_subsampling == YuvChromaSubsampling::Yuv420 {
388 let iter;
389 #[cfg(feature = "rayon")]
390 {
391 iter = y_plane
392 .par_chunks_exact(y_stride as usize * 2)
393 .zip(uv_plane.par_chunks_exact(uv_stride as usize))
394 .zip(ar30.par_chunks_exact_mut(ar30_stride as usize * 2));
395 }
396 #[cfg(not(feature = "rayon"))]
397 {
398 iter = y_plane
399 .chunks_exact(y_stride as usize * 2)
400 .zip(uv_plane.chunks_exact(uv_stride as usize))
401 .zip(ar30.chunks_exact_mut(ar30_stride as usize * 2));
402 }
403 iter.for_each(|((y_src, uv_src), rgba)| {
404 for (y_src, rgba) in y_src
405 .chunks_exact(y_stride as usize)
406 .zip(rgba.chunks_exact_mut(ar30_stride as usize))
407 {
408 process_halved_chroma_row(
409 &y_src[0..image.width as usize],
410 &uv_src[0..(image.width as usize).div_ceil(2) * 2],
411 &mut rgba[0..image.width as usize * CN],
412 );
413 }
414 });
415 if image.height & 1 != 0 {
416 let y_src = y_plane.chunks_exact(y_stride as usize * 2).remainder();
417 let uv_src = uv_plane.chunks_exact(uv_stride as usize).last().unwrap();
418 let rgba = ar30
419 .chunks_exact_mut(ar30_stride as usize * 2)
420 .into_remainder();
421 process_halved_chroma_row(
422 &y_src[0..image.width as usize],
423 &uv_src[0..(image.width as usize).div_ceil(2) * 2],
424 &mut rgba[0..image.width as usize * CN],
425 );
426 }
427 } else {
428 unreachable!();
429 }
430
431 Ok(())
432}
433
434#[inline]
435fn yuv_nv_p10_to_image_impl<
436 const AR30_LAYOUT: usize,
437 const NV_ORDER: u8,
438 const SAMPLING: u8,
439 const ENDIANNESS: u8,
440 const BYTES_POSITION: u8,
441 const BIT_DEPTH: usize,
442 const BACK_SHIFT: i32,
443>(
444 image: &YuvBiPlanarImage<u16>,
445 bgra: &mut [u8],
446 bgra_stride: u32,
447 order: Rgb30ByteOrder,
448 range: YuvRange,
449 matrix: YuvStandardMatrix,
450) -> Result<(), YuvError> {
451 match order {
452 Rgb30ByteOrder::Host => yuv_nv_p10_to_image_impl_d::<
453 AR30_LAYOUT,
454 { Rgb30ByteOrder::Host as usize },
455 NV_ORDER,
456 SAMPLING,
457 ENDIANNESS,
458 BYTES_POSITION,
459 14,
460 BACK_SHIFT,
461 BIT_DEPTH,
462 >(
463 image,
464 bgra,
465 bgra_stride,
466 range,
467 matrix,
468 RowHandlerBalanced::<
469 AR30_LAYOUT,
470 { Rgb30ByteOrder::Host as usize },
471 NV_ORDER,
472 SAMPLING,
473 ENDIANNESS,
474 BYTES_POSITION,
475 14,
476 BIT_DEPTH,
477 >::default(),
478 ),
479 Rgb30ByteOrder::Network => yuv_nv_p10_to_image_impl_d::<
480 AR30_LAYOUT,
481 { Rgb30ByteOrder::Network as usize },
482 NV_ORDER,
483 SAMPLING,
484 ENDIANNESS,
485 BYTES_POSITION,
486 14,
487 BACK_SHIFT,
488 BIT_DEPTH,
489 >(
490 image,
491 bgra,
492 bgra_stride,
493 range,
494 matrix,
495 RowHandlerBalanced::<
496 AR30_LAYOUT,
497 { Rgb30ByteOrder::Network as usize },
498 NV_ORDER,
499 SAMPLING,
500 ENDIANNESS,
501 BYTES_POSITION,
502 14,
503 BIT_DEPTH,
504 >::default(),
505 ),
506 }
507}
508
509macro_rules! define_cnv {
510 ($method: ident, $name: expr, $ar_name:expr, $px_fmt: expr, $chroma_subsampling: expr, $bit_depth: expr, $back_shift: expr) => {
511 #[doc = concat!("
512Converts ", $name, " to ", $ar_name," format.
513This function takes ", $name, " data with ", stringify!($bit_depth),"-bit precision
514and converts it to ", $ar_name," format.
515
516# Arguments
517
518* `bi_planar_image` - Source Bi-Planar ", $bit_depth,"-bit image.
519* `dst` - A mutable slice to store the converted ", $ar_name, " data.
520* `dst_stride` - The stride for the ", $ar_name, " image data.
521* `byte_order` - see [Rgb30ByteOrder] for more info.
522* `range` - range of YUV, see [YuvRange] for more info.
523* `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
524
525# Panics
526
527This function panics if the lengths of the planes or the input ", $ar_name," data are not valid based
528on the specified width, height, and strides, or if invalid YUV range or matrix is provided.")]
529 pub fn $method(
530 bi_planar_image: &YuvBiPlanarImage<u16>,
531 dst: &mut [u8],
532 dst_stride: u32,
533 byte_order: Rgb30ByteOrder,
534 range: YuvRange,
535 matrix: YuvStandardMatrix,
536 ) -> Result<(), YuvError> {
537 yuv_nv_p10_to_image_impl::<{ $px_fmt as usize },
538 { YuvNVOrder::UV as u8 },
539 { $chroma_subsampling as u8 },
540 { YuvEndianness::LittleEndian as u8 },
541 { YuvBytesPacking::MostSignificantBytes as u8 },
542 $bit_depth, $back_shift>(
543 bi_planar_image,
544 dst,
545 dst_stride,
546 byte_order,
547 range,
548 matrix,
549 )
550 }
551 };
552}
553
554define_cnv!(
555 p010_to_ar30,
556 "P010",
557 "AR30",
558 Rgb30::Ar30,
559 YuvChromaSubsampling::Yuv420,
560 10,
561 14
562);
563define_cnv!(
564 p010_to_ra30,
565 "P010",
566 "RA30",
567 Rgb30::Ra30,
568 YuvChromaSubsampling::Yuv420,
569 10,
570 14
571);
572define_cnv!(
573 p210_to_ar30,
574 "P210",
575 "AR30",
576 Rgb30::Ar30,
577 YuvChromaSubsampling::Yuv422,
578 10,
579 14
580);
581define_cnv!(
582 p210_to_ra30,
583 "P210",
584 "RA30",
585 Rgb30::Ra30,
586 YuvChromaSubsampling::Yuv422,
587 10,
588 14
589);
590
591define_cnv!(
592 p012_to_ar30,
593 "P012",
594 "AR30",
595 Rgb30::Ar30,
596 YuvChromaSubsampling::Yuv420,
597 12,
598 16
599);
600define_cnv!(
601 p012_to_ra30,
602 "P012",
603 "RA30",
604 Rgb30::Ra30,
605 YuvChromaSubsampling::Yuv420,
606 12,
607 16
608);
609define_cnv!(
610 p212_to_ar30,
611 "P212",
612 "AR30",
613 Rgb30::Ar30,
614 YuvChromaSubsampling::Yuv422,
615 12,
616 16
617);
618define_cnv!(
619 p212_to_ra30,
620 "P212",
621 "RA30",
622 Rgb30::Ra30,
623 YuvChromaSubsampling::Yuv422,
624 12,
625 16
626);