1use crate::yuv_error::check_rgba_destination;
30use crate::yuv_support::{
31 get_yuv_range, search_inverse_transform, CbCrInverseTransform, YuvPacked444Format,
32 YuvSourceChannels,
33};
34use crate::{YuvError, YuvPackedImage, YuvRange, YuvStandardMatrix};
35#[cfg(feature = "rayon")]
36use rayon::iter::{IndexedParallelIterator, ParallelIterator};
37#[cfg(feature = "rayon")]
38use rayon::prelude::{ParallelSlice, ParallelSliceMut};
39
40#[allow(unused, dead_code)]
41macro_rules! cnv_exec {
42 ($src: expr, $dst: expr, $premultiply_alpha: expr, $ts: expr, $bias_y: expr, $bias_uv: expr, $cn: expr, $packed: expr) => {
43 use crate::numerics::*;
44 if $premultiply_alpha {
45 for (src, dst) in $src
46 .chunks_exact(4)
47 .zip($dst.chunks_exact_mut($cn.get_channels_count()))
48 {
49 let y = src[$packed.get_y_ps()] as i16;
50 let u = src[$packed.get_u_ps()] as i16;
51 let v = src[$packed.get_v_ps()] as i16;
52 let a = src[$packed.get_a_ps()];
53 let y_value = (y - $bias_y) as i32 * $ts.y_coef as i32;
54 let cb_value = u - $bias_uv;
55 let cr_value = v - $bias_uv;
56
57 let r = qrshr::<PRECISION, 8>(y_value + $ts.cr_coef as i32 * cr_value as i32);
58 let b = qrshr::<PRECISION, 8>(y_value + $ts.cb_coef as i32 * cb_value as i32);
59 let g = qrshr::<PRECISION, 8>(
60 y_value
61 - $ts.g_coeff_1 as i32 * cr_value as i32
62 - $ts.g_coeff_2 as i32 * cb_value as i32,
63 );
64
65 let r = div_by_255(r as u16 * a as u16);
66 let b = div_by_255(b as u16 * a as u16);
67 let g = div_by_255(g as u16 * a as u16);
68
69 dst[$cn.get_r_channel_offset()] = r as u8;
70 dst[$cn.get_g_channel_offset()] = g as u8;
71 dst[$cn.get_b_channel_offset()] = b as u8;
72 if $cn.has_alpha() {
73 dst[$cn.get_a_channel_offset()] = a;
74 }
75 }
76 } else {
77 for (src, dst) in $src
78 .chunks_exact(4)
79 .zip($dst.chunks_exact_mut($cn.get_channels_count()))
80 {
81 let y = src[$packed.get_y_ps()] as i16;
82 let u = src[$packed.get_u_ps()] as i16;
83 let v = src[$packed.get_v_ps()] as i16;
84 let a = src[$packed.get_a_ps()];
85 let y_value = (y - $bias_y) as i32 * $ts.y_coef as i32;
86 let cb_value = u - $bias_uv;
87 let cr_value = v - $bias_uv;
88
89 let r = qrshr::<PRECISION, 8>(y_value + $ts.cr_coef as i32 * cr_value as i32);
90 let b = qrshr::<PRECISION, 8>(y_value + $ts.cb_coef as i32 * cb_value as i32);
91 let g = qrshr::<PRECISION, 8>(
92 y_value
93 - $ts.g_coeff_1 as i32 * cr_value as i32
94 - $ts.g_coeff_2 as i32 * cb_value as i32,
95 );
96
97 dst[$cn.get_r_channel_offset()] = r as u8;
98 dst[$cn.get_g_channel_offset()] = g as u8;
99 dst[$cn.get_b_channel_offset()] = b as u8;
100 if $cn.has_alpha() {
101 dst[$cn.get_a_channel_offset()] = a;
102 }
103 }
104 }
105 };
106}
107
108type RowExecutor = unsafe fn(&[u8], &mut [u8], bool, CbCrInverseTransform<i16>, i16, i16, usize);
109
110#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
111fn default_executor<const DST: u8, const PACKED: u8, const PRECISION: i32>(
112 src: &[u8],
113 dst: &mut [u8],
114 premultiply_alpha: bool,
115 ts: CbCrInverseTransform<i16>,
116 bias_y: i16,
117 bias_uv: i16,
118 _: usize,
119) {
120 let cn: YuvSourceChannels = DST.into();
121 let packed: YuvPacked444Format = PACKED.into();
122 cnv_exec!(src, dst, premultiply_alpha, ts, bias_y, bias_uv, cn, packed);
123}
124
125#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
126fn default_executor_neon<const DST: u8, const PACKED: u8, const PRECISION: i32>(
127 src: &[u8],
128 dst: &mut [u8],
129 premultiply_alpha: bool,
130 ts: CbCrInverseTransform<i16>,
131 bias_y: i16,
132 bias_uv: i16,
133 width: usize,
134) {
135 use crate::neon::neon_ayuv_to_rgba;
136 unsafe {
137 neon_ayuv_to_rgba::<DST, PACKED>(src, dst, &ts, bias_y, bias_uv, width, premultiply_alpha);
138 }
139}
140
141#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "rdm"))]
142fn default_executor_neon_rdm<const DST: u8, const PACKED: u8, const PRECISION: i32>(
143 src: &[u8],
144 dst: &mut [u8],
145 premultiply_alpha: bool,
146 ts: CbCrInverseTransform<i16>,
147 bias_y: i16,
148 bias_uv: i16,
149 width: usize,
150) {
151 use crate::neon::neon_ayuv_to_rgba_rdm;
152 unsafe {
153 neon_ayuv_to_rgba_rdm::<DST, PACKED>(
154 src,
155 dst,
156 &ts,
157 bias_y,
158 bias_uv,
159 width,
160 premultiply_alpha,
161 );
162 }
163}
164
165#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
166#[target_feature(enable = "avx2")]
167unsafe fn default_executor_avx2<const DST: u8, const PACKED: u8, const PRECISION: i32>(
168 src: &[u8],
169 dst: &mut [u8],
170 premultiply_alpha: bool,
171 ts: CbCrInverseTransform<i16>,
172 bias_y: i16,
173 bias_uv: i16,
174 width: usize,
175) {
176 use crate::avx2::avx2_ayuv_to_rgba;
177 avx2_ayuv_to_rgba::<DST, PACKED>(src, dst, &ts, bias_y, bias_uv, width, premultiply_alpha);
178}
179
180#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
181#[target_feature(enable = "sse4.1")]
182unsafe fn default_executor_sse<const DST: u8, const PACKED: u8, const PRECISION: i32>(
183 src: &[u8],
184 dst: &mut [u8],
185 premultiply_alpha: bool,
186 ts: CbCrInverseTransform<i16>,
187 bias_y: i16,
188 bias_uv: i16,
189 _: usize,
190) {
191 let cn: YuvSourceChannels = DST.into();
192 let packed: YuvPacked444Format = PACKED.into();
193 cnv_exec!(src, dst, premultiply_alpha, ts, bias_y, bias_uv, cn, packed);
194}
195
196fn make_executor<const DST: u8, const PACKED: u8, const PRECISION: i32>() -> RowExecutor {
197 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
198 {
199 #[cfg(feature = "avx")]
200 if std::arch::is_x86_feature_detected!("avx2") {
201 return default_executor_avx2::<DST, PACKED, PRECISION>;
202 }
203 #[cfg(feature = "sse")]
204 if std::arch::is_x86_feature_detected!("sse4.1") {
205 return default_executor_sse::<DST, PACKED, PRECISION>;
206 }
207 }
208 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
209 {
210 #[cfg(feature = "rdm")]
211 {
212 if std::arch::is_aarch64_feature_detected!("rdm") {
213 return default_executor_neon_rdm::<DST, PACKED, PRECISION>;
214 }
215 }
216 default_executor_neon::<DST, PACKED, PRECISION>
217 }
218 #[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
219 default_executor::<DST, PACKED, PRECISION>
220}
221
222fn ayuv_to_rgb_launch<const DST: u8, const PACKED: u8>(
223 image: &YuvPackedImage<u8>,
224 dst: &mut [u8],
225 dst_stride: usize,
226 range: YuvRange,
227 matrix: YuvStandardMatrix,
228 premultiply_alpha: bool,
229) -> Result<(), YuvError> {
230 let cn: YuvSourceChannels = DST.into();
231 image.check_constraints444()?;
232 check_rgba_destination(
233 dst,
234 dst_stride as u32,
235 image.width,
236 image.height,
237 cn.get_channels_count(),
238 )?;
239
240 let chroma_range = get_yuv_range(8, range);
241 let kr_kb = matrix.get_kr_kb();
242 let bias_y = chroma_range.bias_y as i16;
243 let bias_uv = chroma_range.bias_uv as i16;
244
245 const PRECISION: i32 = 13;
246
247 let ts =
248 search_inverse_transform(PRECISION, 8, range, matrix, chroma_range, kr_kb).cast::<i16>();
249
250 let iter;
251
252 #[cfg(not(feature = "rayon"))]
253 {
254 iter = image
255 .yuy
256 .chunks_exact(image.yuy_stride as usize)
257 .zip(dst.chunks_exact_mut(dst_stride));
258 }
259 #[cfg(feature = "rayon")]
260 {
261 iter = image
262 .yuy
263 .par_chunks_exact(image.yuy_stride as usize)
264 .zip(dst.par_chunks_exact_mut(dst_stride));
265 }
266
267 let mut _executor: RowExecutor = make_executor::<DST, PACKED, PRECISION>();
268
269 iter.for_each(|(src, dst)| {
270 let src = &src[0..image.width as usize * 4];
271 let dst = &mut dst[0..image.width as usize * cn.get_channels_count()];
272 unsafe {
273 _executor(
274 src,
275 dst,
276 premultiply_alpha,
277 ts,
278 bias_y,
279 bias_uv,
280 image.width as usize,
281 );
282 }
283 });
284
285 Ok(())
286}
287
288macro_rules! d_cnv {
289 ($method: ident, $px_fmt: expr, $packed_fmt: expr, $px_fmt_name: expr, $to_fmt: expr) => {
290 #[doc = concat!("Converts ", $px_fmt_name," to ", $to_fmt," 8-bit depth precision.")]
291 pub fn $method(
292 image: &YuvPackedImage<u8>,
293 dst: &mut [u8],
294 dst_stride: u32,
295 range: YuvRange,
296 matrix: YuvStandardMatrix,
297 premultiply_alpha: bool,
298 ) -> Result<(), YuvError> {
299 ayuv_to_rgb_launch::<{ $px_fmt as u8 }, { $packed_fmt as u8 }>(
300 image,
301 dst,
302 dst_stride as usize,
303 range,
304 matrix,
305 premultiply_alpha,
306 )
307 }
308 };
309}
310
311d_cnv!(
312 vyua_to_rgb,
313 YuvSourceChannels::Rgb,
314 YuvPacked444Format::Vuya,
315 "VUYA",
316 "RGB"
317);
318d_cnv!(
319 vyua_to_rgba,
320 YuvSourceChannels::Rgba,
321 YuvPacked444Format::Vuya,
322 "VUYA",
323 "RGBA"
324);
325
326d_cnv!(
327 ayuv_to_rgb,
328 YuvSourceChannels::Rgb,
329 YuvPacked444Format::Ayuv,
330 "AYUV",
331 "RGB"
332);
333d_cnv!(
334 ayuv_to_rgba,
335 YuvSourceChannels::Rgba,
336 YuvPacked444Format::Ayuv,
337 "AYUV",
338 "RGBA"
339);