1#[allow(unused_imports)]
30use crate::internals::ProcessedOffset;
31#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
32use crate::neon::neon_yuv_p16_to_rgba_alpha_row;
33use crate::numerics::to_ne;
34use crate::yuv_error::check_rgba_destination;
35use crate::yuv_support::{
36 get_yuv_range, search_inverse_transform, YuvBytesPacking, YuvChromaSubsampling, YuvEndianness,
37 YuvRange, YuvSourceChannels, YuvStandardMatrix,
38};
39use crate::{YuvError, YuvPlanarImageWithAlpha};
40#[cfg(feature = "rayon")]
41use rayon::iter::{IndexedParallelIterator, ParallelIterator};
42#[cfg(feature = "rayon")]
43use rayon::prelude::{ParallelSlice, ParallelSliceMut};
44
45fn yuv_p16_to_image_alpha_ant<
46 const DESTINATION_CHANNELS: u8,
47 const SAMPLING: u8,
48 const ENDIANNESS: u8,
49 const BYTES_POSITION: u8,
50 const BIT_DEPTH: usize,
51>(
52 image: &YuvPlanarImageWithAlpha<u16>,
53 rgba: &mut [u8],
54 rgba_stride: u32,
55 range: YuvRange,
56 matrix: YuvStandardMatrix,
57) -> Result<(), YuvError> {
58 let dst_chans: YuvSourceChannels = DESTINATION_CHANNELS.into();
59 let channels = dst_chans.get_channels_count();
60
61 assert!(
62 dst_chans != YuvSourceChannels::Rgb && dst_chans != YuvSourceChannels::Bgr,
63 "Cannot call YUV p16 to Rgb8 with alpha without real alpha"
64 );
65 assert!(
66 BIT_DEPTH == 10 || BIT_DEPTH == 12,
67 "YUV16 -> RGB8 implemented only 10 and 12 bit depth"
68 );
69
70 let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into();
71
72 image.check_constraints(chroma_subsampling)?;
73 check_rgba_destination(rgba, rgba_stride, image.width, image.height, channels)?;
74
75 let chroma_range = get_yuv_range(BIT_DEPTH as u32, range);
76 let kr_kb = matrix.get_kr_kb();
77 const PRECISION: i32 = 13;
78 let i_transform = search_inverse_transform(
79 PRECISION,
80 BIT_DEPTH as u32,
81 range,
82 matrix,
83 chroma_range,
84 kr_kb,
85 );
86 let cr_coef = i_transform.cr_coef;
87 let cb_coef = i_transform.cb_coef;
88 let y_coef = i_transform.y_coef;
89 let g_coef_1 = i_transform.g_coeff_1;
90 let g_coef_2 = i_transform.g_coeff_2;
91
92 let bias_y = chroma_range.bias_y as i32;
93 let bias_uv = chroma_range.bias_uv as i32;
94
95 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
96 let use_sse = std::arch::is_x86_feature_detected!("sse4.1");
97 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
98 let use_avx = std::arch::is_x86_feature_detected!("avx2");
99
100 let msb_shift = (16 - BIT_DEPTH) as i32;
101 let store_shift = BIT_DEPTH - 8;
102
103 #[inline(always)]
104 fn qrshr<const BIT_DEPTH: usize>(val: i32) -> i32 {
106 let total_shift = PRECISION + (BIT_DEPTH as i32 - 8);
107 let rounding: i32 = 1 << (total_shift - 1);
108 let max_value: i32 = (1 << BIT_DEPTH) - 1;
109 ((val + rounding) >> total_shift).min(max_value).max(0)
110 }
111
112 let process_wide_row = |_y_plane: &[u16],
113 _u_plane: &[u16],
114 _v_plane: &[u16],
115 _a_plane: &[u16],
116 _rgba: &mut [u8]| {
117 let mut _cx = 0usize;
118 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
119 {
120 let mut _v_offset = ProcessedOffset { cx: 0, ux: 0 };
121 {
122 #[cfg(feature = "avx")]
123 if use_avx {
124 use crate::avx2::avx_yuv_p16_to_rgba8_alpha_row;
125 unsafe {
126 let offset = avx_yuv_p16_to_rgba8_alpha_row::<
127 DESTINATION_CHANNELS,
128 SAMPLING,
129 ENDIANNESS,
130 BYTES_POSITION,
131 BIT_DEPTH,
132 PRECISION,
133 >(
134 _y_plane,
135 _u_plane,
136 _v_plane,
137 _a_plane,
138 _rgba,
139 image.width,
140 &chroma_range,
141 &i_transform,
142 _v_offset.cx,
143 _v_offset.ux,
144 );
145 _v_offset = offset;
146 }
147 }
148 #[cfg(feature = "sse")]
149 if use_sse {
150 use crate::sse::sse_yuv_p16_to_rgba8_alpha_row;
151 unsafe {
152 let offset = sse_yuv_p16_to_rgba8_alpha_row::<
153 DESTINATION_CHANNELS,
154 SAMPLING,
155 ENDIANNESS,
156 BYTES_POSITION,
157 BIT_DEPTH,
158 PRECISION,
159 >(
160 _y_plane,
161 _u_plane,
162 _v_plane,
163 _a_plane,
164 _rgba,
165 image.width,
166 &chroma_range,
167 &i_transform,
168 _v_offset.cx,
169 _v_offset.ux,
170 );
171 _v_offset = offset;
172 }
173 }
174 }
175 _cx = _v_offset.cx;
176 }
177 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
178 {
179 unsafe {
180 let offset = neon_yuv_p16_to_rgba_alpha_row::<
181 DESTINATION_CHANNELS,
182 SAMPLING,
183 ENDIANNESS,
184 BYTES_POSITION,
185 BIT_DEPTH,
186 PRECISION,
187 >(
188 _y_plane,
189 _u_plane,
190 _v_plane,
191 _a_plane,
192 _rgba,
193 image.width,
194 &chroma_range,
195 &i_transform,
196 0,
197 0,
198 );
199 _cx = offset.cx;
200 }
201 }
202 _cx
203 };
204
205 let process_halved_chroma_row = |y_plane: &[u16],
206 u_plane: &[u16],
207 v_plane: &[u16],
208 a_plane: &[u16],
209 rgba: &mut [u8]| {
210 let cx = process_wide_row(y_plane, u_plane, v_plane, a_plane, rgba);
211
212 for ((((rgba, y_src), &u_src), &v_src), a_src) in rgba
213 .chunks_exact_mut(channels * 2)
214 .zip(y_plane.chunks_exact(2))
215 .zip(u_plane.iter())
216 .zip(v_plane.iter())
217 .zip(a_plane.chunks_exact(2))
218 .skip(cx / 2)
219 {
220 let y_value0 =
221 (to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[0], msb_shift) as i32 - bias_y) * y_coef;
222 let cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(u_src, msb_shift) as i32 - bias_uv;
223 let cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(v_src, msb_shift) as i32 - bias_uv;
224
225 let r0 = qrshr::<BIT_DEPTH>(y_value0 + cr_coef * cr_value);
226 let b0 = qrshr::<BIT_DEPTH>(y_value0 + cb_coef * cb_value);
227 let g0 = qrshr::<BIT_DEPTH>(y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value);
228
229 let rgba0 = &mut rgba[0..channels];
230
231 rgba0[dst_chans.get_r_channel_offset()] = r0 as u8;
232 rgba0[dst_chans.get_g_channel_offset()] = g0 as u8;
233 rgba0[dst_chans.get_b_channel_offset()] = b0 as u8;
234 rgba0[dst_chans.get_a_channel_offset()] = (a_src[0] >> store_shift).min(255) as u8;
235
236 let y_value1 =
237 (to_ne::<ENDIANNESS, BYTES_POSITION>(y_src[1], msb_shift) as i32 - bias_y) * y_coef;
238
239 let r1 = qrshr::<BIT_DEPTH>(y_value1 + cr_coef * cr_value);
240 let b1 = qrshr::<BIT_DEPTH>(y_value1 + cb_coef * cb_value);
241 let g1 = qrshr::<BIT_DEPTH>(y_value1 - g_coef_1 * cr_value - g_coef_2 * cb_value);
242
243 let rgba1 = &mut rgba[channels..channels * 2];
244
245 rgba1[dst_chans.get_r_channel_offset()] = r1 as u8;
246 rgba1[dst_chans.get_g_channel_offset()] = g1 as u8;
247 rgba1[dst_chans.get_b_channel_offset()] = b1 as u8;
248 rgba1[dst_chans.get_a_channel_offset()] = (a_src[1] >> store_shift).min(255) as u8;
249 }
250
251 if image.width & 1 != 0 {
252 let y_value0 = (to_ne::<ENDIANNESS, BYTES_POSITION>(*y_plane.last().unwrap(), msb_shift)
253 as i32
254 - bias_y)
255 * y_coef;
256 let cb_value = to_ne::<ENDIANNESS, BYTES_POSITION>(*u_plane.last().unwrap(), msb_shift)
257 as i32
258 - bias_uv;
259 let cr_value = to_ne::<ENDIANNESS, BYTES_POSITION>(*v_plane.last().unwrap(), msb_shift)
260 as i32
261 - bias_uv;
262 let a_value = *a_plane.last().unwrap();
263 let rgba = rgba.chunks_exact_mut(channels).last().unwrap();
264 let rgba0 = &mut rgba[0..channels];
265
266 let r0 = qrshr::<BIT_DEPTH>(y_value0 + cr_coef * cr_value);
267 let b0 = qrshr::<BIT_DEPTH>(y_value0 + cb_coef * cb_value);
268 let g0 = qrshr::<BIT_DEPTH>(y_value0 - g_coef_1 * cr_value - g_coef_2 * cb_value);
269 rgba0[dst_chans.get_r_channel_offset()] = r0 as u8;
270 rgba0[dst_chans.get_g_channel_offset()] = g0 as u8;
271 rgba0[dst_chans.get_b_channel_offset()] = b0 as u8;
272 rgba0[dst_chans.get_a_channel_offset()] = (a_value >> store_shift).min(255) as u8;
273 }
274 };
275
276 if chroma_subsampling == YuvChromaSubsampling::Yuv444 {
277 let iter;
278 #[cfg(feature = "rayon")]
279 {
280 iter = rgba
281 .par_chunks_exact_mut(rgba_stride as usize)
282 .zip(image.y_plane.par_chunks_exact(image.y_stride as usize))
283 .zip(image.a_plane.par_chunks_exact(image.a_stride as usize))
284 .zip(image.u_plane.par_chunks_exact(image.u_stride as usize))
285 .zip(image.v_plane.par_chunks_exact(image.v_stride as usize));
286 }
287 #[cfg(not(feature = "rayon"))]
288 {
289 iter = rgba
290 .chunks_exact_mut(rgba_stride as usize)
291 .zip(image.y_plane.chunks_exact(image.y_stride as usize))
292 .zip(image.a_plane.chunks_exact(image.a_stride as usize))
293 .zip(image.u_plane.chunks_exact(image.u_stride as usize))
294 .zip(image.v_plane.chunks_exact(image.v_stride as usize));
295 }
296 iter.for_each(|((((rgba, y_plane), a_plane), u_plane), v_plane)| {
297 let y_plane = &y_plane[0..image.width as usize];
298 let cx = process_wide_row(y_plane, u_plane, v_plane, a_plane, rgba);
299
300 for ((((rgba, &y_src), &u_src), &v_src), &a_src) in rgba
301 .chunks_exact_mut(channels)
302 .zip(y_plane.iter())
303 .zip(u_plane.iter())
304 .zip(v_plane.iter())
305 .zip(a_plane.iter())
306 .skip(cx)
307 {
308 let y_value = (to_ne::<ENDIANNESS, BYTES_POSITION>(y_src, msb_shift) as i32
309 - bias_y)
310 * y_coef;
311 let cb_value =
312 to_ne::<ENDIANNESS, BYTES_POSITION>(u_src, msb_shift) as i32 - bias_uv;
313 let cr_value =
314 to_ne::<ENDIANNESS, BYTES_POSITION>(v_src, msb_shift) as i32 - bias_uv;
315
316 let r = qrshr::<BIT_DEPTH>(y_value + cr_coef * cr_value);
317 let b = qrshr::<BIT_DEPTH>(y_value + cb_coef * cb_value);
318 let g = qrshr::<BIT_DEPTH>(y_value - g_coef_1 * cr_value - g_coef_2 * cb_value);
319
320 rgba[dst_chans.get_r_channel_offset()] = r as u8;
321 rgba[dst_chans.get_g_channel_offset()] = g as u8;
322 rgba[dst_chans.get_b_channel_offset()] = b as u8;
323 rgba[dst_chans.get_a_channel_offset()] = (a_src >> store_shift).min(255) as u8;
324 }
325 });
326 } else if chroma_subsampling == YuvChromaSubsampling::Yuv422 {
327 let iter;
328 #[cfg(feature = "rayon")]
329 {
330 iter = rgba
331 .par_chunks_exact_mut(rgba_stride as usize)
332 .zip(image.y_plane.par_chunks_exact(image.y_stride as usize))
333 .zip(image.a_plane.par_chunks_exact(image.a_stride as usize))
334 .zip(image.u_plane.par_chunks_exact(image.u_stride as usize))
335 .zip(image.v_plane.par_chunks_exact(image.v_stride as usize));
336 }
337 #[cfg(not(feature = "rayon"))]
338 {
339 iter = rgba
340 .chunks_exact_mut(rgba_stride as usize)
341 .zip(image.y_plane.chunks_exact(image.y_stride as usize))
342 .zip(image.a_plane.chunks_exact(image.a_stride as usize))
343 .zip(image.u_plane.chunks_exact(image.u_stride as usize))
344 .zip(image.v_plane.chunks_exact(image.v_stride as usize));
345 }
346 iter.for_each(|((((rgba, y_plane), a_plane), u_plane), v_plane)| {
347 process_halved_chroma_row(y_plane, u_plane, v_plane, a_plane, rgba);
348 });
349 } else if chroma_subsampling == YuvChromaSubsampling::Yuv420 {
350 let iter;
351 #[cfg(feature = "rayon")]
352 {
353 iter = rgba
354 .par_chunks_exact_mut(rgba_stride as usize * 2)
355 .zip(image.y_plane.par_chunks_exact(image.y_stride as usize * 2))
356 .zip(image.a_plane.par_chunks_exact(image.a_stride as usize * 2))
357 .zip(image.u_plane.par_chunks_exact(image.u_stride as usize))
358 .zip(image.v_plane.par_chunks_exact(image.v_stride as usize));
359 }
360 #[cfg(not(feature = "rayon"))]
361 {
362 iter = rgba
363 .chunks_exact_mut(rgba_stride as usize * 2)
364 .zip(image.y_plane.chunks_exact(image.y_stride as usize * 2))
365 .zip(image.a_plane.chunks_exact(image.a_stride as usize * 2))
366 .zip(image.u_plane.chunks_exact(image.u_stride as usize))
367 .zip(image.v_plane.chunks_exact(image.v_stride as usize));
368 }
369 iter.for_each(|((((rgba, y_plane), a_plane), u_plane), v_plane)| {
370 for ((rgba, y_plane), a_plane) in rgba
371 .chunks_exact_mut(rgba_stride as usize)
372 .zip(y_plane.chunks_exact(image.y_stride as usize))
373 .zip(a_plane.chunks_exact(image.a_stride as usize))
374 {
375 process_halved_chroma_row(
376 &y_plane[0..image.width as usize],
377 &u_plane[0..(image.width as usize).div_ceil(2)],
378 &v_plane[0..(image.width as usize).div_ceil(2)],
379 &a_plane[0..image.width as usize],
380 &mut rgba[0..image.width as usize * channels],
381 );
382 }
383 });
384
385 if image.height & 1 != 0 {
386 let rgba = rgba.chunks_exact_mut(rgba_stride as usize).last().unwrap();
387 let u_plane = image
388 .u_plane
389 .chunks_exact(image.u_stride as usize)
390 .last()
391 .unwrap();
392 let v_plane = image
393 .v_plane
394 .chunks_exact(image.v_stride as usize)
395 .last()
396 .unwrap();
397 let a_plane = image
398 .a_plane
399 .chunks_exact(image.a_stride as usize)
400 .last()
401 .unwrap();
402 let y_plane = image
403 .y_plane
404 .chunks_exact(image.y_stride as usize)
405 .last()
406 .unwrap();
407 process_halved_chroma_row(
408 &y_plane[0..image.width as usize],
409 &u_plane[0..(image.width as usize).div_ceil(2)],
410 &v_plane[0..(image.width as usize).div_ceil(2)],
411 &a_plane[0..image.width as usize],
412 &mut rgba[0..image.width as usize * channels],
413 );
414 }
415 } else {
416 unreachable!();
417 }
418
419 Ok(())
420}
421
422macro_rules! d_cnv {
423 ($method: ident, $px_fmt: expr, $sampling: expr, $endian: expr, $sampling_written: expr, $px_written: expr, $px_written_small: expr, $bit_depth: expr) => {
424 #[doc = concat!("
425Convert ",$sampling_written, " planar format with ", stringify!($bit_depth), " bit pixel format to ", $px_written," 8 bit-depth format with interleaving alpha.
426
427This function takes ", $sampling_written, " planar data with ", stringify!($bit_depth), " bit precision and interleaved provided alpha channel,
428and converts it to ", $px_written," format with 8 bit-depth precision per channel.
429
430# Arguments
431
432* `planar_image` - Source ",$sampling_written," planar image.
433* `", $px_written_small, "` - A mutable slice to store the converted ", $px_written," 8 bit-depth data.
434* `", $px_written_small, "_stride` - The stride (components per row) for ", $px_written," 8 bit-depth data.
435* `range` - The YUV range (limited or full).
436* `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
437* `bit_depth` - Bit depth of source YUV planes, only 10 and 12 is supported.
438
439# Panics
440
441This function panics if the lengths of the planes or the input ", $px_written," data are not valid based
442on the specified width, height, and strides, or if invalid YUV range or matrix is provided.")]
443 pub fn $method(
444 planar_image_with_alpha: &YuvPlanarImageWithAlpha<u16>,
445 dst: &mut [u8],
446 dst_stride: u32,
447 range: YuvRange,
448 matrix: YuvStandardMatrix,
449 ) -> Result<(), YuvError> {
450 yuv_p16_to_image_alpha_ant::<{ $px_fmt as u8 },
451 { $sampling as u8 },
452 { $endian as u8 },
453 { YuvBytesPacking::LeastSignificantBytes as u8 }, $bit_depth>(
454 planar_image_with_alpha, dst, dst_stride, range, matrix)
455 }
456 };
457}
458
459d_cnv!(
460 i010_alpha_to_rgba,
461 YuvSourceChannels::Rgba,
462 YuvChromaSubsampling::Yuv420,
463 YuvEndianness::LittleEndian,
464 "I010A",
465 "RGBA",
466 "rgba",
467 10
468);
469#[cfg(feature = "big_endian")]
470d_cnv!(
471 i010_be_alpha_to_rgba,
472 YuvSourceChannels::Rgba,
473 YuvChromaSubsampling::Yuv420,
474 YuvEndianness::BigEndian,
475 "I010ABE",
476 "RGBA",
477 "rgba",
478 10
479);
480
481d_cnv!(
482 i210_alpha_to_rgba,
483 YuvSourceChannels::Rgba,
484 YuvChromaSubsampling::Yuv422,
485 YuvEndianness::LittleEndian,
486 "I210A",
487 "RGBA",
488 "rgba",
489 10
490);
491#[cfg(feature = "big_endian")]
492d_cnv!(
493 i210_alpha_be_to_rgba,
494 YuvSourceChannels::Rgba,
495 YuvChromaSubsampling::Yuv422,
496 YuvEndianness::BigEndian,
497 "I210ABE",
498 "RGBA",
499 "rgba",
500 10
501);
502d_cnv!(
503 i410_alpha_to_rgba,
504 YuvSourceChannels::Rgba,
505 YuvChromaSubsampling::Yuv444,
506 YuvEndianness::LittleEndian,
507 "I410A",
508 "RGBA",
509 "rgba",
510 10
511);
512#[cfg(feature = "big_endian")]
513d_cnv!(
514 i410_be_alpha_to_rgba,
515 YuvSourceChannels::Rgba,
516 YuvChromaSubsampling::Yuv444,
517 YuvEndianness::BigEndian,
518 "I410ABE",
519 "RGBA",
520 "rgba",
521 10
522);
523
524d_cnv!(
525 i012_alpha_to_rgba,
526 YuvSourceChannels::Rgba,
527 YuvChromaSubsampling::Yuv420,
528 YuvEndianness::LittleEndian,
529 "I012A",
530 "RGBA",
531 "rgba",
532 12
533);
534#[cfg(feature = "big_endian")]
535d_cnv!(
536 i012_be_alpha_to_rgba,
537 YuvSourceChannels::Rgba,
538 YuvChromaSubsampling::Yuv420,
539 YuvEndianness::BigEndian,
540 "I012ABE",
541 "RGBA",
542 "rgba",
543 12
544);
545d_cnv!(
546 i212_alpha_to_rgba,
547 YuvSourceChannels::Rgba,
548 YuvChromaSubsampling::Yuv422,
549 YuvEndianness::LittleEndian,
550 "I212A",
551 "RGBA",
552 "rgba",
553 12
554);
555#[cfg(feature = "big_endian")]
556d_cnv!(
557 i212_be_alpha_to_rgba,
558 YuvSourceChannels::Rgba,
559 YuvChromaSubsampling::Yuv422,
560 YuvEndianness::BigEndian,
561 "I212ABE",
562 "RGBA",
563 "rgba",
564 12
565);
566d_cnv!(
567 i412_alpha_to_rgba,
568 YuvSourceChannels::Rgba,
569 YuvChromaSubsampling::Yuv444,
570 YuvEndianness::LittleEndian,
571 "I412A",
572 "RGBA",
573 "rgba",
574 12
575);
576#[cfg(feature = "big_endian")]
577d_cnv!(
578 i412_be_alpha_to_rgba,
579 YuvSourceChannels::Rgba,
580 YuvChromaSubsampling::Yuv444,
581 YuvEndianness::BigEndian,
582 "I412ABE",
583 "RGBA",
584 "rgba",
585 12
586);