1#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
30use crate::neon::neon_y_to_rgb_alpha_row;
31use crate::numerics::qrshr;
32use crate::yuv_error::check_rgba_destination;
33use crate::yuv_support::*;
34use crate::{YuvError, YuvGrayAlphaImage};
35use num_traits::AsPrimitive;
36#[cfg(feature = "rayon")]
37use rayon::iter::{IndexedParallelIterator, ParallelIterator};
38#[cfg(feature = "rayon")]
39use rayon::prelude::{ParallelSlice, ParallelSliceMut};
40use std::fmt::Debug;
41use std::marker::PhantomData;
42
43struct WideRowProcessor<T> {
44 _phantom: PhantomData<T>,
45 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
46 _use_rdm: bool,
47 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
48 _use_sse: bool,
49 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
50 _use_avx: bool,
51}
52
53impl<V> Default for WideRowProcessor<V> {
54 fn default() -> Self {
55 WideRowProcessor {
56 _phantom: PhantomData,
57 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
58 _use_rdm: std::arch::is_aarch64_feature_detected!("rdm"),
59 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
60 _use_sse: std::arch::is_x86_feature_detected!("sse4.1"),
61 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
62 _use_avx: std::arch::is_x86_feature_detected!("avx2"),
63 }
64 }
65}
66
67trait ProcessRowHandler<V> {
68 fn handle_row<const PRECISION: i32, const DESTINATION_CHANNELS: u8>(
69 &self,
70 range: &YuvChromaRange,
71 transform: &CbCrInverseTransform<i32>,
72 y_plane: &[V],
73 a_plane: &[V],
74 rgba: &mut [V],
75 start_cx: usize,
76 width: usize,
77 ) -> usize;
78}
79
80impl ProcessRowHandler<u16> for WideRowProcessor<u16> {
81 fn handle_row<const PRECISION: i32, const DESTINATION_CHANNELS: u8>(
82 &self,
83 _range: &YuvChromaRange,
84 _transform: &CbCrInverseTransform<i32>,
85 _y_plane: &[u16],
86 _a_plane: &[u16],
87 _rgba: &mut [u16],
88 _start_cx: usize,
89 _width: usize,
90 ) -> usize {
91 0
92 }
93}
94
95impl ProcessRowHandler<u8> for WideRowProcessor<u8> {
96 fn handle_row<const PRECISION: i32, const DESTINATION_CHANNELS: u8>(
97 &self,
98 _range: &YuvChromaRange,
99 _transform: &CbCrInverseTransform<i32>,
100 _y_plane: &[u8],
101 _a_plane: &[u8],
102 _rgba: &mut [u8],
103 _start_cx: usize,
104 _width: usize,
105 ) -> usize {
106 let mut _cx = _start_cx;
107 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
108 unsafe {
109 let neon_wide_row_handler = if self._use_rdm {
110 #[cfg(feature = "rdm")]
111 {
112 use crate::neon::neon_y_to_rgb_row_alpha_rdm;
113 neon_y_to_rgb_row_alpha_rdm::<DESTINATION_CHANNELS>
114 }
115 #[cfg(not(feature = "rdm"))]
116 {
117 neon_y_to_rgb_alpha_row::<DESTINATION_CHANNELS>
118 }
119 } else {
120 neon_y_to_rgb_alpha_row::<DESTINATION_CHANNELS>
121 };
122
123 let offset =
124 neon_wide_row_handler(_range, _transform, _y_plane, _a_plane, _rgba, _cx, _width);
125 _cx = offset;
126 }
127 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx"))]
128 if self._use_avx {
129 use crate::avx2::avx2_y_to_rgba_alpha_row;
130 let offset = avx2_y_to_rgba_alpha_row::<DESTINATION_CHANNELS>(
131 _range, _transform, _y_plane, _a_plane, _rgba, _cx, _width,
132 );
133 _cx = offset;
134 }
135 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
136 if self._use_sse {
137 use crate::sse::sse_y_to_rgba_alpha_row;
138 let offset = sse_y_to_rgba_alpha_row::<DESTINATION_CHANNELS>(
139 _range, _transform, _y_plane, _a_plane, _rgba, _cx, _width,
140 );
141 _cx = offset;
142 }
143 _cx
144 }
145}
146
147#[inline]
149fn y_with_alpha_to_rgbx<
150 V: Copy + AsPrimitive<i16> + 'static + Send + Sync + Debug + Default + Clone,
151 const DESTINATION_CHANNELS: u8,
152 const BIT_DEPTH: usize,
153>(
154 image: &YuvGrayAlphaImage<V>,
155 rgba: &mut [V],
156 rgba_stride: u32,
157 range: YuvRange,
158 matrix: YuvStandardMatrix,
159) -> Result<(), YuvError>
160where
161 i32: AsPrimitive<V>,
162 WideRowProcessor<V>: ProcessRowHandler<V>,
163{
164 let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into();
165 let channels = destination_channels.get_channels_count();
166 assert!(
167 destination_channels.has_alpha(),
168 "YUV400 with alpha cannot be called on target image without alpha"
169 );
170 assert_eq!(
171 channels, 4,
172 "YUV400 with alpha cannot be called on target image without alpha"
173 );
174 assert!(
175 (8..=16).contains(&BIT_DEPTH),
176 "Invalid bit depth is provided"
177 );
178
179 check_rgba_destination(rgba, rgba_stride, image.width, image.height, channels)?;
180 image.check_constraints()?;
181
182 let chroma_range = get_yuv_range(BIT_DEPTH as u32, range);
183 let kr_kb = matrix.get_kr_kb();
184 const PRECISION: i32 = 13;
185 let inverse_transform =
186 search_inverse_transform(PRECISION, 8, range, matrix, chroma_range, kr_kb);
187 let y_coef = inverse_transform.y_coef as i16;
188 let bias_y = chroma_range.bias_y as i16;
189
190 let iter;
191 let y_iter;
192 let a_iter;
193 #[cfg(feature = "rayon")]
194 {
195 iter = rgba.par_chunks_exact_mut(rgba_stride as usize);
196 y_iter = image.y_plane.par_chunks_exact(image.y_stride as usize);
197 a_iter = image.a_plane.par_chunks_exact(image.a_stride as usize);
198 }
199 #[cfg(not(feature = "rayon"))]
200 {
201 iter = rgba.chunks_exact_mut(rgba_stride as usize);
202 y_iter = image.y_plane.chunks_exact(image.y_stride as usize);
203 a_iter = image.a_plane.chunks_exact(image.a_stride as usize);
204 }
205
206 if range == YuvRange::Limited {
207 let handler = WideRowProcessor::<V>::default();
208 iter.zip(y_iter)
209 .zip(a_iter)
210 .for_each(|((rgba, y_plane), a_plane)| {
211 let y_plane = &y_plane[0..image.width as usize];
212 let mut _cx = 0usize;
213
214 let offset = handler.handle_row::<PRECISION, DESTINATION_CHANNELS>(
215 &chroma_range,
216 &inverse_transform,
217 y_plane,
218 a_plane,
219 rgba,
220 _cx,
221 image.width as usize,
222 );
223 _cx = offset;
224
225 for ((y_src, a_src), rgba) in y_plane
226 .iter()
227 .zip(a_plane)
228 .zip(rgba.chunks_exact_mut(channels))
229 .skip(_cx)
230 {
231 let y_value = (y_src.as_() - bias_y) as i32 * y_coef as i32;
232
233 let r = qrshr::<PRECISION, BIT_DEPTH>(y_value);
234 rgba[destination_channels.get_r_channel_offset()] = r.as_();
235 rgba[destination_channels.get_g_channel_offset()] = r.as_();
236 rgba[destination_channels.get_b_channel_offset()] = r.as_();
237 rgba[destination_channels.get_a_channel_offset()] = *a_src;
238 }
239 });
240 } else {
241 iter.zip(y_iter)
242 .zip(a_iter)
243 .for_each(|((rgba, y_plane), a_plane)| {
244 let y_plane = &y_plane[0..image.width as usize];
245 for ((y_src, a_src), rgba) in y_plane
246 .iter()
247 .zip(a_plane)
248 .zip(rgba.chunks_exact_mut(channels))
249 {
250 let y_value = *y_src;
251 rgba[destination_channels.get_r_channel_offset()] = y_value;
252 rgba[destination_channels.get_g_channel_offset()] = y_value;
253 rgba[destination_channels.get_b_channel_offset()] = y_value;
254 rgba[destination_channels.get_a_channel_offset()] = *a_src;
255 }
256 });
257 }
258
259 Ok(())
260}
261
262pub fn yuv400_alpha_to_rgba(
281 gray_alpha_image: &YuvGrayAlphaImage<u8>,
282 rgba: &mut [u8],
283 rgba_stride: u32,
284 range: YuvRange,
285 matrix: YuvStandardMatrix,
286) -> Result<(), YuvError> {
287 y_with_alpha_to_rgbx::<u8, { YuvSourceChannels::Rgba as u8 }, 8>(
288 gray_alpha_image,
289 rgba,
290 rgba_stride,
291 range,
292 matrix,
293 )
294}
295
296pub fn yuv400_alpha_to_bgra(
315 gray_alpha_image: &YuvGrayAlphaImage<u8>,
316 bgra: &mut [u8],
317 bgra_stride: u32,
318 range: YuvRange,
319 matrix: YuvStandardMatrix,
320) -> Result<(), YuvError> {
321 y_with_alpha_to_rgbx::<u8, { YuvSourceChannels::Bgra as u8 }, 8>(
322 gray_alpha_image,
323 bgra,
324 bgra_stride,
325 range,
326 matrix,
327 )
328}