1use crate::yuv_error::check_rgba_destination;
30use crate::YuvError;
31use core::f16;
32
33pub(crate) trait SurfaceToFloat16<V> {
34 fn to_float16(&self, src: &[V], dst: &mut [f16], bit_depth: usize);
35}
36
37pub(crate) trait SurfaceFloat16ToUnsigned<V> {
38 fn to_unsigned(&self, src: &[f16], dst: &mut [V], bit_depth: usize);
39}
40
41trait ConverterFactoryFloat16<V> {
42 fn make_forward_converter(bit_depth: usize) -> Box<dyn SurfaceToFloat16<V>>;
43 fn make_inverse_converter(bit_depth: usize) -> Box<dyn SurfaceFloat16ToUnsigned<V>>;
44}
45
46impl ConverterFactoryFloat16<u8> for u8 {
47 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
48 fn make_forward_converter(bit_depth: usize) -> Box<dyn SurfaceToFloat16<u8>> {
49 use crate::neon::{SurfaceU8ToFloat16Neon, SurfaceU8ToFloat16NeonFallback};
50 if bit_depth <= 14 && std::arch::is_aarch64_feature_detected!("fp16") {
51 return Box::new(SurfaceU8ToFloat16Neon::default());
52 }
53 Box::new(SurfaceU8ToFloat16NeonFallback::default())
54 }
55
56 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
57 fn make_forward_converter(_bit_depth: usize) -> Box<dyn SurfaceToFloat16<u8>> {
58 #[cfg(feature = "avx")]
59 {
60 use crate::avx2::SurfaceU8ToFloat16Avx2;
61 if _bit_depth <= 14
62 && std::arch::is_x86_feature_detected!("avx2")
63 && std::arch::is_x86_feature_detected!("f16c")
64 {
65 return Box::new(SurfaceU8ToFloat16Avx2::default());
66 }
67 }
68 Box::new(CommonSurfaceToFloat16::<u8> {
69 _phantom: Default::default(),
70 })
71 }
72
73 #[cfg(not(any(
74 all(target_arch = "aarch64", target_feature = "neon"),
75 any(target_arch = "x86", target_arch = "x86_64")
76 )))]
77 fn make_forward_converter(_: usize) -> Box<dyn SurfaceToFloat16<u8>> {
78 Box::new(CommonSurfaceToFloat16::<u8> {
79 _phantom: Default::default(),
80 })
81 }
82
83 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
84 fn make_inverse_converter(bit_depth: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u8>> {
85 use crate::neon::{SurfaceF16ToUnsigned8Neon, SurfaceF16ToUnsigned8NeonFallback};
86 if bit_depth <= 14 && std::arch::is_aarch64_feature_detected!("fp16") {
87 return Box::new(SurfaceF16ToUnsigned8Neon::default());
88 }
89 Box::new(SurfaceF16ToUnsigned8NeonFallback::default())
90 }
91
92 #[cfg(not(any(
93 all(target_arch = "aarch64", target_feature = "neon"),
94 any(target_arch = "x86", target_arch = "x86_64")
95 )))]
96 fn make_inverse_converter(_: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u8>> {
97 Box::new(CommonSurfaceFloat16ToUnsigned::<u8> {
98 _phantom: Default::default(),
99 })
100 }
101
102 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
103 fn make_inverse_converter(_: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u8>> {
104 Box::new(CommonSurfaceFloat16ToUnsigned::<u8> {
105 _phantom: Default::default(),
106 })
107 }
108}
109
110impl ConverterFactoryFloat16<u16> for u16 {
111 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
112 fn make_forward_converter(bit_depth: usize) -> Box<dyn SurfaceToFloat16<u16>> {
113 use crate::neon::{SurfaceU16ToFloat16Neon, SurfaceU16ToFloat16NeonFallback};
114 if bit_depth <= 14 && std::arch::is_aarch64_feature_detected!("fp16") {
115 return Box::new(SurfaceU16ToFloat16Neon::default());
116 }
117 Box::new(SurfaceU16ToFloat16NeonFallback::default())
118 }
119
120 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
121 fn make_forward_converter(_bit_depth: usize) -> Box<dyn SurfaceToFloat16<u16>> {
122 #[cfg(feature = "avx")]
123 {
124 use crate::avx2::SurfaceU16ToFloat16Avx2;
125 if _bit_depth <= 14
126 && std::arch::is_x86_feature_detected!("avx2")
127 && std::arch::is_x86_feature_detected!("f16c")
128 {
129 return Box::new(SurfaceU16ToFloat16Avx2::default());
130 }
131 }
132 Box::new(CommonSurfaceToFloat16::<u16> {
133 _phantom: Default::default(),
134 })
135 }
136
137 #[cfg(not(any(
138 all(target_arch = "aarch64", target_feature = "neon"),
139 any(target_arch = "x86", target_arch = "x86_64")
140 )))]
141 fn make_forward_converter(_: usize) -> Box<dyn SurfaceToFloat16<u16>> {
142 Box::new(CommonSurfaceToFloat16::<u16> {
143 _phantom: Default::default(),
144 })
145 }
146
147 #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
148 fn make_inverse_converter(bit_depth: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u16>> {
149 use crate::neon::{SurfaceF16ToUnsigned16Neon, SurfaceF16ToUnsigned16NeonFallback};
150 if bit_depth <= 14 && std::arch::is_aarch64_feature_detected!("fp16") {
151 return Box::new(SurfaceF16ToUnsigned16Neon::default());
152 }
153 Box::new(SurfaceF16ToUnsigned16NeonFallback::default())
154 }
155
156 #[cfg(not(any(
157 all(target_arch = "aarch64", target_feature = "neon"),
158 any(target_arch = "x86", target_arch = "x86_64")
159 )))]
160 fn make_inverse_converter(_: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u16>> {
161 Box::new(CommonSurfaceFloat16ToUnsigned::<u16> {
162 _phantom: Default::default(),
163 })
164 }
165
166 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
167 fn make_inverse_converter(_: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u16>> {
168 Box::new(CommonSurfaceFloat16ToUnsigned::<u16> {
169 _phantom: Default::default(),
170 })
171 }
172}
173
174#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
175struct CommonSurfaceToFloat16<V: num_traits::AsPrimitive<f32> + Copy> {
176 _phantom: std::marker::PhantomData<V>,
177}
178
179#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
180impl<V: num_traits::AsPrimitive<f32> + Copy> SurfaceToFloat16<V> for CommonSurfaceToFloat16<V> {
181 fn to_float16(&self, src: &[V], dst: &mut [f16], bit_depth: usize) {
182 let scale_f32 = 1. / ((1 << (bit_depth)) - 1) as f32;
183 for (src, dst) in src.iter().zip(dst.iter_mut()) {
184 let src_f32 = src.as_();
185 *dst = (src_f32 * scale_f32) as f16;
186 }
187 }
188}
189
190#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
191struct CommonSurfaceFloat16ToUnsigned<V: num_traits::AsPrimitive<f32> + Copy> {
192 _phantom: std::marker::PhantomData<V>,
193}
194
195#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
196impl<V: num_traits::AsPrimitive<f32> + Copy> SurfaceFloat16ToUnsigned<V>
197 for CommonSurfaceFloat16ToUnsigned<V>
198where
199 f32: num_traits::AsPrimitive<V>,
200{
201 fn to_unsigned(&self, src: &[f16], dst: &mut [V], bit_depth: usize) {
202 use num_traits::AsPrimitive;
203 let scale_f32 = ((1 << (bit_depth)) - 1) as f32;
204 for (src, dst) in src.iter().zip(dst.iter_mut()) {
205 let src_f32 = (*src as f32 * scale_f32).round();
206 *dst = src_f32.as_();
207 }
208 }
209}
210
211fn convert_surface_to_f16<V: Copy + ConverterFactoryFloat16<V>, const CN: usize>(
212 src: &[V],
213 src_stride: usize,
214 dst: &mut [f16],
215 dst_stride: usize,
216 bit_depth: usize,
217 width: usize,
218 height: usize,
219) -> Result<(), YuvError> {
220 check_rgba_destination(src, src_stride as u32, width as u32, height as u32, CN)?;
221 check_rgba_destination(dst, dst_stride as u32, width as u32, height as u32, CN)?;
222
223 let converter = V::make_forward_converter(bit_depth);
224
225 for (src, dst) in src
226 .chunks_exact(src_stride)
227 .zip(dst.chunks_exact_mut(dst_stride))
228 {
229 let src = &src[0..width * CN];
230 let dst = &mut dst[0..width * CN];
231 converter.to_float16(src, dst, bit_depth);
232 }
233
234 Ok(())
235}
236
237fn convert_f16_surface_to_unsigned<V: Copy + ConverterFactoryFloat16<V>, const CN: usize>(
238 src: &[f16],
239 src_stride: usize,
240 dst: &mut [V],
241 dst_stride: usize,
242 bit_depth: usize,
243 width: usize,
244 height: usize,
245) -> Result<(), YuvError> {
246 check_rgba_destination(src, src_stride as u32, width as u32, height as u32, CN)?;
247 check_rgba_destination(dst, dst_stride as u32, width as u32, height as u32, CN)?;
248
249 let converter = V::make_inverse_converter(bit_depth);
250
251 for (src, dst) in src
252 .chunks_exact(src_stride)
253 .zip(dst.chunks_exact_mut(dst_stride))
254 {
255 let src = &src[0..width * CN];
256 let dst = &mut dst[0..width * CN];
257 converter.to_unsigned(src, dst, bit_depth);
258 }
259
260 Ok(())
261}
262
263pub fn convert_plane_to_f16(
276 src: &[u8],
277 src_stride: usize,
278 dst: &mut [f16],
279 dst_stride: usize,
280 width: usize,
281 height: usize,
282) -> Result<(), YuvError> {
283 convert_surface_to_f16::<u8, 1>(src, src_stride, dst, dst_stride, 8, width, height)
284}
285
286pub fn convert_rgba_to_f16(
301 src: &[u8],
302 src_stride: usize,
303 dst: &mut [f16],
304 dst_stride: usize,
305 width: usize,
306 height: usize,
307) -> Result<(), YuvError> {
308 convert_surface_to_f16::<u8, 4>(src, src_stride, dst, dst_stride, 8, width, height)
309}
310
311pub fn convert_rgb_to_f16(
326 src: &[u8],
327 src_stride: usize,
328 dst: &mut [f16],
329 dst_stride: usize,
330 width: usize,
331 height: usize,
332) -> Result<(), YuvError> {
333 convert_surface_to_f16::<u8, 3>(src, src_stride, dst, dst_stride, 8, width, height)
334}
335
336pub fn convert_plane16_to_f16(
350 src: &[u16],
351 src_stride: usize,
352 dst: &mut [f16],
353 dst_stride: usize,
354 bit_depth: usize,
355 width: usize,
356 height: usize,
357) -> Result<(), YuvError> {
358 convert_surface_to_f16::<u16, 1>(src, src_stride, dst, dst_stride, bit_depth, width, height)
359}
360
361pub fn convert_rgba16_to_f16(
377 src: &[u16],
378 src_stride: usize,
379 dst: &mut [f16],
380 dst_stride: usize,
381 bit_depth: usize,
382 width: usize,
383 height: usize,
384) -> Result<(), YuvError> {
385 convert_surface_to_f16::<u16, 4>(src, src_stride, dst, dst_stride, bit_depth, width, height)
386}
387
388pub fn convert_rgb16_to_f16(
404 src: &[u16],
405 src_stride: usize,
406 dst: &mut [f16],
407 dst_stride: usize,
408 bit_depth: usize,
409 width: usize,
410 height: usize,
411) -> Result<(), YuvError> {
412 convert_surface_to_f16::<u16, 3>(src, src_stride, dst, dst_stride, bit_depth, width, height)
413}
414
415pub fn convert_plane_f16_to_planar(
428 src: &[f16],
429 src_stride: usize,
430 dst: &mut [u8],
431 dst_stride: usize,
432 width: usize,
433 height: usize,
434) -> Result<(), YuvError> {
435 convert_f16_surface_to_unsigned::<u8, 1>(src, src_stride, dst, dst_stride, 8, width, height)
436}
437
438pub fn convert_rgb_f16_to_rgb(
451 src: &[f16],
452 src_stride: usize,
453 dst: &mut [u8],
454 dst_stride: usize,
455 width: usize,
456 height: usize,
457) -> Result<(), YuvError> {
458 convert_f16_surface_to_unsigned::<u8, 3>(src, src_stride, dst, dst_stride, 8, width, height)
459}
460
461pub fn convert_rgba_f16_to_rgba(
474 src: &[f16],
475 src_stride: usize,
476 dst: &mut [u8],
477 dst_stride: usize,
478 width: usize,
479 height: usize,
480) -> Result<(), YuvError> {
481 convert_f16_surface_to_unsigned::<u8, 4>(src, src_stride, dst, dst_stride, 8, width, height)
482}
483
484pub fn convert_plane_f16_to_planar16(
498 src: &[f16],
499 src_stride: usize,
500 dst: &mut [u16],
501 dst_stride: usize,
502 bit_depth: usize,
503 width: usize,
504 height: usize,
505) -> Result<(), YuvError> {
506 convert_f16_surface_to_unsigned::<u16, 1>(
507 src, src_stride, dst, dst_stride, bit_depth, width, height,
508 )
509}
510
511pub fn convert_rgb_f16_to_rgb16(
525 src: &[f16],
526 src_stride: usize,
527 dst: &mut [u16],
528 dst_stride: usize,
529 bit_depth: usize,
530 width: usize,
531 height: usize,
532) -> Result<(), YuvError> {
533 convert_f16_surface_to_unsigned::<u16, 3>(
534 src, src_stride, dst, dst_stride, bit_depth, width, height,
535 )
536}
537
538pub fn convert_rgba_f16_to_rgba16(
552 src: &[f16],
553 src_stride: usize,
554 dst: &mut [u16],
555 dst_stride: usize,
556 bit_depth: usize,
557 width: usize,
558 height: usize,
559) -> Result<(), YuvError> {
560 convert_f16_surface_to_unsigned::<u16, 4>(
561 src, src_stride, dst, dst_stride, bit_depth, width, height,
562 )
563}