1use crate::internals::ProcessedOffset;
30#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
31use crate::neon::neon_yuv_nv_p16_to_rgba_row;
32use crate::numerics::{qrshr_n, to_ne};
33use crate::yuv_error::check_rgba_destination;
34use crate::yuv_support::*;
35use crate::{YuvBiPlanarImage, YuvError};
36#[cfg(feature = "rayon")]
37use rayon::iter::{IndexedParallelIterator, ParallelIterator};
38#[cfg(feature = "rayon")]
39use rayon::prelude::{ParallelSlice, ParallelSliceMut};
40
41fn yuv_nv_p16_to_image_impl<
42 const DESTINATION_CHANNELS: u8,
43 const NV_ORDER: u8,
44 const SAMPLING: u8,
45 const ENDIANNESS: u8,
46 const BYTES_POSITION: u8,
47 const BIT_DEPTH: usize,
48>(
49 image: &YuvBiPlanarImage<u16>,
50 bgra: &mut [u16],
51 bgra_stride: u32,
52 range: YuvRange,
53 matrix: YuvStandardMatrix,
54) -> Result<(), YuvError> {
55 let dst_chans: YuvSourceChannels = DESTINATION_CHANNELS.into();
56 let channels = dst_chans.get_channels_count();
57 let uv_order: YuvNVOrder = NV_ORDER.into();
58 let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into();
59 let chroma_range = get_yuv_range(BIT_DEPTH as u32, range);
60 let kr_kb = matrix.get_kr_kb();
61 let max_range = ((1u32 << (BIT_DEPTH as u32)) - 1u32) as i32;
62
63 image.check_constraints(chroma_subsampling)?;
64 check_rgba_destination(bgra, bgra_stride, image.width, image.height, channels)?;
65
66 const PRECISION: i32 = 13;
67 let i_transform = search_inverse_transform(
68 PRECISION,
69 BIT_DEPTH as u32,
70 range,
71 matrix,
72 chroma_range,
73 kr_kb,
74 );
75 let cr_coef = i_transform.cr_coef;
76 let cb_coef = i_transform.cb_coef;
77 let y_coef = i_transform.y_coef;
78 let g_coef_1 = i_transform.g_coeff_1;
79 let g_coef_2 = i_transform.g_coeff_2;
80
81 let bias_y = chroma_range.bias_y as i32;
82 let bias_uv = chroma_range.bias_uv as i32;
83
84 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
85 let mut _use_sse = std::arch::is_x86_feature_detected!("sse4.1");
86 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
87 let is_rdm_available = std::arch::is_aarch64_feature_detected!("rdm");
88 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
89 let neon_wide_row_handler = if is_rdm_available && BIT_DEPTH <= 12 {
90 #[cfg(feature = "rdm")]
91 {
92 use crate::neon::neon_yuv_nv_p16_to_rgba_row_rdm;
93 neon_yuv_nv_p16_to_rgba_row_rdm::<
94 DESTINATION_CHANNELS,
95 NV_ORDER,
96 SAMPLING,
97 ENDIANNESS,
98 BYTES_POSITION,
99 BIT_DEPTH,
100 PRECISION,
101 >
102 }
103 #[cfg(not(feature = "rdm"))]
104 {
105 neon_yuv_nv_p16_to_rgba_row::<
106 DESTINATION_CHANNELS,
107 NV_ORDER,
108 SAMPLING,
109 ENDIANNESS,
110 BYTES_POSITION,
111 BIT_DEPTH,
112 PRECISION,
113 >
114 }
115 } else {
116 neon_yuv_nv_p16_to_rgba_row::<
117 DESTINATION_CHANNELS,
118 NV_ORDER,
119 SAMPLING,
120 ENDIANNESS,
121 BYTES_POSITION,
122 BIT_DEPTH,
123 PRECISION,
124 >
125 };
126
127 let process_wide_row = |_rgba: &mut [u16], _y_src: &[u16], _uv_src: &[u16]| {
128 let mut _offset = ProcessedOffset { cx: 0, ux: 0 };
129 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
130 {
131 #[cfg(feature = "sse")]
132 if _use_sse {
133 use crate::sse::sse_yuv_nv_p16_to_rgba_row;
134 unsafe {
135 let processed = sse_yuv_nv_p16_to_rgba_row::<
136 DESTINATION_CHANNELS,
137 NV_ORDER,
138 SAMPLING,
139 ENDIANNESS,
140 BYTES_POSITION,
141 BIT_DEPTH,
142 PRECISION,
143 >(
144 _y_src,
145 _uv_src,
146 _rgba,
147 image.width,
148 &chroma_range,
149 &i_transform,
150 _offset.cx,
151 _offset.ux,
152 );
153 _offset = processed;
154 }
155 }
156 }
157
158 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
159 {
160 unsafe {
161 let processed = neon_wide_row_handler(
162 _y_src,
163 _uv_src,
164 _rgba,
165 image.width,
166 &chroma_range,
167 &i_transform,
168 0,
169 0,
170 );
171 _offset = processed;
172 }
173 }
174 _offset
175 };
176
177 let msb_shift = (16 - BIT_DEPTH) as i32;
178 let width = image.width;
179
180 let process_halved_chroma_row = |y_src: &[u16], uv_src: &[u16], rgba: &mut [u16]| {
181 let processed = process_wide_row(rgba, y_src, uv_src);
182
183 for ((rgba, y_src), uv_src) in rgba
184 .chunks_exact_mut(channels * 2)
185 .zip(y_src.chunks_exact(2))
186 .zip(uv_src.chunks_exact(2))
187 .skip(processed.cx / 2)
188 {
189 let y_vl0 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32;
190 let mut cb_value =
191 to_ne::<ENDIANNESS, BYTES_POSITION>(uv_src[uv_order.get_u_position()], msb_shift)
192 as i32;
193 let mut cr_value =
194 to_ne::<ENDIANNESS, BYTES_POSITION>(uv_src[uv_order.get_v_position()], msb_shift)
195 as i32;
196
197 let y_value0: i32 = (y_vl0 - bias_y) * y_coef;
198
199 cb_value -= bias_uv;
200 cr_value -= bias_uv;
201
202 let r_p0 = qrshr_n::<PRECISION>(y_value0 + cr_coef * cr_value, max_range);
203 let b_p0 = qrshr_n::<PRECISION>(y_value0 + cb_coef * cb_value, max_range);
204 let g_p0 = qrshr_n::<PRECISION>(
205 y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value,
206 max_range,
207 );
208
209 let rgba0 = &mut rgba[0..channels];
210
211 rgba0[dst_chans.get_b_channel_offset()] = b_p0 as u16;
212 rgba0[dst_chans.get_g_channel_offset()] = g_p0 as u16;
213 rgba0[dst_chans.get_r_channel_offset()] = r_p0 as u16;
214
215 if dst_chans.has_alpha() {
216 rgba0[dst_chans.get_a_channel_offset()] = max_range as u16;
217 }
218
219 let y_vl1 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[1], msb_shift) as i32;
220
221 let y_value1: i32 = (y_vl1 - bias_y) * y_coef;
222
223 let r_p1 = qrshr_n::<PRECISION>(y_value1 + cr_coef * cr_value, max_range);
224 let b_p1 = qrshr_n::<PRECISION>(y_value1 + cb_coef * cb_value, max_range);
225 let g_p1 = qrshr_n::<PRECISION>(
226 y_value1 - g_coef_1 * cr_value - g_coef_2 * cb_value,
227 max_range,
228 );
229
230 let rgba1 = &mut rgba[channels..channels * 2];
231
232 rgba1[dst_chans.get_b_channel_offset()] = b_p1 as u16;
233 rgba1[dst_chans.get_g_channel_offset()] = g_p1 as u16;
234 rgba1[dst_chans.get_r_channel_offset()] = r_p1 as u16;
235
236 if dst_chans.has_alpha() {
237 rgba1[dst_chans.get_a_channel_offset()] = max_range as u16;
238 }
239 }
240
241 if width & 1 != 0 {
242 let rgba = rgba.chunks_exact_mut(channels * 2).into_remainder();
243 let rgba = &mut rgba[0..channels];
244 let uv_src = uv_src.chunks_exact(2).last().unwrap();
245 let y_src = y_src.chunks_exact(2).remainder();
246
247 let y_vl0 = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32;
248 let y_value0: i32 = (y_vl0 - bias_y) * y_coef;
249 let mut cb_value =
250 to_ne::<ENDIANNESS, BYTES_POSITION>(uv_src[uv_order.get_u_position()], msb_shift)
251 as i32;
252 let mut cr_value =
253 to_ne::<ENDIANNESS, BYTES_POSITION>(uv_src[uv_order.get_v_position()], msb_shift)
254 as i32;
255
256 cb_value -= bias_uv;
257 cr_value -= bias_uv;
258
259 let r_p0 = qrshr_n::<PRECISION>(y_value0 + cr_coef * cr_value, max_range);
260 let b_p0 = qrshr_n::<PRECISION>(y_value0 + cb_coef * cb_value, max_range);
261 let g_p0 = qrshr_n::<PRECISION>(
262 y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value,
263 max_range,
264 );
265
266 rgba[dst_chans.get_b_channel_offset()] = b_p0 as u16;
267 rgba[dst_chans.get_g_channel_offset()] = g_p0 as u16;
268 rgba[dst_chans.get_r_channel_offset()] = r_p0 as u16;
269
270 if dst_chans.has_alpha() {
271 rgba[dst_chans.get_a_channel_offset()] = max_range as u16;
272 }
273 }
274 };
275
276 let y_stride = image.y_stride;
277 let uv_stride = image.uv_stride;
278 let y_plane = image.y_plane;
279 let uv_plane = image.uv_plane;
280
281 if chroma_subsampling == YuvChromaSubsampling::Yuv444 {
282 let iter;
283 #[cfg(feature = "rayon")]
284 {
285 iter = y_plane
286 .par_chunks_exact(y_stride as usize)
287 .zip(uv_plane.par_chunks_exact(uv_stride as usize))
288 .zip(bgra.par_chunks_exact_mut(bgra_stride as usize));
289 }
290 #[cfg(not(feature = "rayon"))]
291 {
292 iter = y_plane
293 .chunks_exact(y_stride as usize)
294 .zip(uv_plane.chunks_exact(uv_stride as usize))
295 .zip(bgra.chunks_exact_mut(bgra_stride as usize));
296 }
297 iter.for_each(|((y_src, uv_src), rgba)| {
298 let y_src = &y_src[0..image.width as usize];
299 let processed = process_wide_row(rgba, y_src, uv_src);
300
301 for ((rgba, &y_src), uv_src) in rgba
302 .chunks_exact_mut(channels)
303 .zip(y_src.iter())
304 .zip(uv_src.chunks_exact(2))
305 .skip(processed.cx)
306 {
307 let y_vl = to_ne::<ENDIANNESS, BYTES_POSITION>(y_src, msb_shift) as i32;
308 let mut cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
309 uv_src[uv_order.get_u_position()],
310 msb_shift,
311 ) as i32;
312 let mut cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(
313 uv_src[uv_order.get_v_position()],
314 msb_shift,
315 ) as i32;
316
317 let y_value: i32 = (y_vl - bias_y) * y_coef;
318
319 cb_value -= bias_uv;
320 cr_value -= bias_uv;
321
322 let r_p16 = qrshr_n::<PRECISION>(y_value + cr_coef * cr_value, max_range);
323 let b_p16 = qrshr_n::<PRECISION>(y_value + cb_coef * cb_value, max_range);
324 let g_p16 = qrshr_n::<PRECISION>(
325 y_value - g_coef_1 * cr_value - g_coef_2 * cb_value,
326 max_range,
327 );
328
329 let rgba0 = &mut rgba[0..channels];
330
331 rgba0[dst_chans.get_b_channel_offset()] = b_p16 as u16;
332 rgba0[dst_chans.get_g_channel_offset()] = g_p16 as u16;
333 rgba0[dst_chans.get_r_channel_offset()] = r_p16 as u16;
334
335 if dst_chans.has_alpha() {
336 rgba0[dst_chans.get_a_channel_offset()] = max_range as u16;
337 }
338 }
339 });
340 } else if chroma_subsampling == YuvChromaSubsampling::Yuv422 {
341 let iter;
342 #[cfg(feature = "rayon")]
343 {
344 iter = y_plane
345 .par_chunks_exact(y_stride as usize)
346 .zip(uv_plane.par_chunks_exact(uv_stride as usize))
347 .zip(bgra.par_chunks_exact_mut(bgra_stride as usize));
348 }
349 #[cfg(not(feature = "rayon"))]
350 {
351 iter = y_plane
352 .chunks_exact(y_stride as usize)
353 .zip(uv_plane.chunks_exact(uv_stride as usize))
354 .zip(bgra.chunks_exact_mut(bgra_stride as usize));
355 }
356 iter.for_each(|((y_src, uv_src), rgba)| {
357 process_halved_chroma_row(
358 &y_src[0..image.width as usize],
359 &uv_src[0..(image.width as usize).div_ceil(2) * 2],
360 &mut rgba[0..image.width as usize * channels],
361 );
362 });
363 } else if chroma_subsampling == YuvChromaSubsampling::Yuv420 {
364 let iter;
365 #[cfg(feature = "rayon")]
366 {
367 iter = y_plane
368 .par_chunks_exact(y_stride as usize * 2)
369 .zip(uv_plane.par_chunks_exact(uv_stride as usize))
370 .zip(bgra.par_chunks_exact_mut(bgra_stride as usize * 2));
371 }
372 #[cfg(not(feature = "rayon"))]
373 {
374 iter = y_plane
375 .chunks_exact(y_stride as usize * 2)
376 .zip(uv_plane.chunks_exact(uv_stride as usize))
377 .zip(bgra.chunks_exact_mut(bgra_stride as usize * 2));
378 }
379 iter.for_each(|((y_src, uv_src), rgba)| {
380 for (y_src, rgba) in y_src
381 .chunks_exact(y_stride as usize)
382 .zip(rgba.chunks_exact_mut(bgra_stride as usize))
383 {
384 process_halved_chroma_row(
385 &y_src[0..image.width as usize],
386 &uv_src[0..(image.width as usize).div_ceil(2) * 2],
387 &mut rgba[0..image.width as usize * channels],
388 );
389 }
390 });
391 if image.height & 1 != 0 {
392 let y_src = y_plane.chunks_exact(y_stride as usize * 2).remainder();
393 let uv_src = uv_plane.chunks_exact(uv_stride as usize).last().unwrap();
394 let rgba = bgra
395 .chunks_exact_mut(bgra_stride as usize * 2)
396 .into_remainder();
397 process_halved_chroma_row(
398 &y_src[0..image.width as usize],
399 &uv_src[0..(image.width as usize).div_ceil(2) * 2],
400 &mut rgba[0..image.width as usize * channels],
401 );
402 }
403 } else {
404 unreachable!();
405 }
406 Ok(())
407}
408
409macro_rules! d_cnv {
410 ($method: ident, $px_fmt: expr, $subsampling: expr, $yuv_name: expr, $px_name: expr, $bit_precision: expr) => {
411 #[doc = concat!("Convert ", $yuv_name," format to ", $px_name, stringify!($bit_precision)," format.
412
413This function takes ", $yuv_name," data with ", stringify!($bit_precision),"-bit precision
414and converts it to ", $px_name, stringify!($bit_precision)," format with ", $bit_precision," bit-depth precision.
415
416# Arguments
417
418* `bi_planar_image` - Source ", stringify!($bit_precision)," bit-depth ", $yuv_name," image.
419* `dst` - A mutable slice to store the converted ", $px_name," ", $bit_precision," bit-depth data.
420* `dst_stride` - The stride (components per row) for the ", $px_name," image data.
421* `range` - range of YUV, see [YuvRange] for more info.
422* `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
423
424# Panics
425
426This function panics if the lengths of the planes or the input ", $px_name," data are not valid based
427on the specified width, height, and strides, or if invalid YUV range or matrix is provided.")]
428 pub fn $method(
429 bi_planar_image: &YuvBiPlanarImage<u16>,
430 rgba: &mut [u16],
431 rgba_stride: u32,
432 range: YuvRange,
433 matrix: YuvStandardMatrix,
434 ) -> Result<(), YuvError> {
435 let dispatcher = yuv_nv_p16_to_image_impl::<
436 { $px_fmt as u8 },
437 { YuvNVOrder::UV as u8 },
438 { $subsampling as u8 },
439 { YuvEndianness::LittleEndian as u8 },
440 { YuvBytesPacking::MostSignificantBytes as u8 },
441 $bit_precision,
442 >;
443 dispatcher(bi_planar_image, rgba, rgba_stride, range, matrix)
444 }
445 };
446}
447
448d_cnv!(
449 p010_to_rgba10,
450 YuvSourceChannels::Rgba,
451 YuvChromaSubsampling::Yuv420,
452 "P010",
453 "RGBA",
454 10
455);
456d_cnv!(
457 p010_to_rgb10,
458 YuvSourceChannels::Rgb,
459 YuvChromaSubsampling::Yuv420,
460 "P010",
461 "RGB",
462 10
463);
464d_cnv!(
465 p210_to_rgba10,
466 YuvSourceChannels::Rgba,
467 YuvChromaSubsampling::Yuv422,
468 "P210",
469 "RGBA",
470 10
471);
472d_cnv!(
473 p210_to_rgb10,
474 YuvSourceChannels::Rgb,
475 YuvChromaSubsampling::Yuv422,
476 "P210",
477 "RGB",
478 10
479);
480d_cnv!(
481 p410_to_rgba10,
482 YuvSourceChannels::Rgba,
483 YuvChromaSubsampling::Yuv444,
484 "P410",
485 "RGBA",
486 10
487);
488d_cnv!(
489 p410_to_rgb10,
490 YuvSourceChannels::Rgb,
491 YuvChromaSubsampling::Yuv444,
492 "P410",
493 "RGB",
494 10
495);
496
497d_cnv!(
498 p012_to_rgba12,
499 YuvSourceChannels::Rgba,
500 YuvChromaSubsampling::Yuv420,
501 "P012",
502 "RGBA",
503 12
504);
505d_cnv!(
506 p012_to_rgb12,
507 YuvSourceChannels::Rgb,
508 YuvChromaSubsampling::Yuv420,
509 "P012",
510 "RGB",
511 12
512);
513d_cnv!(
514 p212_to_rgba12,
515 YuvSourceChannels::Rgba,
516 YuvChromaSubsampling::Yuv422,
517 "P212",
518 "RGBA",
519 12
520);
521d_cnv!(
522 p212_to_rgb12,
523 YuvSourceChannels::Rgb,
524 YuvChromaSubsampling::Yuv422,
525 "P212",
526 "RGB",
527 12
528);
529d_cnv!(
530 p412_to_rgba12,
531 YuvSourceChannels::Rgba,
532 YuvChromaSubsampling::Yuv444,
533 "P412",
534 "RGBA",
535 12
536);
537d_cnv!(
538 p412_to_rgb12,
539 YuvSourceChannels::Rgb,
540 YuvChromaSubsampling::Yuv444,
541 "P412",
542 "RGB",
543 12
544);