yuvutils_rs/
f16_converter.rs

1/*
2 * Copyright (c) Radzivon Bartoshyk, 1/2025. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 *
7 * 1.  Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9 *
10 * 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * 3.  Neither the name of the copyright holder nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::yuv_error::check_rgba_destination;
30use crate::YuvError;
31use core::f16;
32
33pub(crate) trait SurfaceToFloat16<V> {
34    fn to_float16(&self, src: &[V], dst: &mut [f16], bit_depth: usize);
35}
36
37pub(crate) trait SurfaceFloat16ToUnsigned<V> {
38    fn to_unsigned(&self, src: &[f16], dst: &mut [V], bit_depth: usize);
39}
40
41trait ConverterFactoryFloat16<V> {
42    fn make_forward_converter(bit_depth: usize) -> Box<dyn SurfaceToFloat16<V>>;
43    fn make_inverse_converter(bit_depth: usize) -> Box<dyn SurfaceFloat16ToUnsigned<V>>;
44}
45
46impl ConverterFactoryFloat16<u8> for u8 {
47    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
48    fn make_forward_converter(bit_depth: usize) -> Box<dyn SurfaceToFloat16<u8>> {
49        use crate::neon::{SurfaceU8ToFloat16Neon, SurfaceU8ToFloat16NeonFallback};
50        if bit_depth <= 14 && std::arch::is_aarch64_feature_detected!("fp16") {
51            return Box::new(SurfaceU8ToFloat16Neon::default());
52        }
53        Box::new(SurfaceU8ToFloat16NeonFallback::default())
54    }
55
56    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
57    fn make_forward_converter(_bit_depth: usize) -> Box<dyn SurfaceToFloat16<u8>> {
58        #[cfg(feature = "avx")]
59        {
60            use crate::avx2::SurfaceU8ToFloat16Avx2;
61            if _bit_depth <= 14
62                && std::arch::is_x86_feature_detected!("avx2")
63                && std::arch::is_x86_feature_detected!("f16c")
64            {
65                return Box::new(SurfaceU8ToFloat16Avx2::default());
66            }
67        }
68        Box::new(CommonSurfaceToFloat16::<u8> {
69            _phantom: Default::default(),
70        })
71    }
72
73    #[cfg(not(any(
74        all(target_arch = "aarch64", target_feature = "neon"),
75        any(target_arch = "x86", target_arch = "x86_64")
76    )))]
77    fn make_forward_converter(_: usize) -> Box<dyn SurfaceToFloat16<u8>> {
78        Box::new(CommonSurfaceToFloat16::<u8> {
79            _phantom: Default::default(),
80        })
81    }
82
83    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
84    fn make_inverse_converter(bit_depth: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u8>> {
85        use crate::neon::{SurfaceF16ToUnsigned8Neon, SurfaceF16ToUnsigned8NeonFallback};
86        if bit_depth <= 14 && std::arch::is_aarch64_feature_detected!("fp16") {
87            return Box::new(SurfaceF16ToUnsigned8Neon::default());
88        }
89        Box::new(SurfaceF16ToUnsigned8NeonFallback::default())
90    }
91
92    #[cfg(not(any(
93        all(target_arch = "aarch64", target_feature = "neon"),
94        any(target_arch = "x86", target_arch = "x86_64")
95    )))]
96    fn make_inverse_converter(_: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u8>> {
97        Box::new(CommonSurfaceFloat16ToUnsigned::<u8> {
98            _phantom: Default::default(),
99        })
100    }
101
102    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
103    fn make_inverse_converter(_: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u8>> {
104        Box::new(CommonSurfaceFloat16ToUnsigned::<u8> {
105            _phantom: Default::default(),
106        })
107    }
108}
109
110impl ConverterFactoryFloat16<u16> for u16 {
111    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
112    fn make_forward_converter(bit_depth: usize) -> Box<dyn SurfaceToFloat16<u16>> {
113        use crate::neon::{SurfaceU16ToFloat16Neon, SurfaceU16ToFloat16NeonFallback};
114        if bit_depth <= 14 && std::arch::is_aarch64_feature_detected!("fp16") {
115            return Box::new(SurfaceU16ToFloat16Neon::default());
116        }
117        Box::new(SurfaceU16ToFloat16NeonFallback::default())
118    }
119
120    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
121    fn make_forward_converter(_bit_depth: usize) -> Box<dyn SurfaceToFloat16<u16>> {
122        #[cfg(feature = "avx")]
123        {
124            use crate::avx2::SurfaceU16ToFloat16Avx2;
125            if _bit_depth <= 14
126                && std::arch::is_x86_feature_detected!("avx2")
127                && std::arch::is_x86_feature_detected!("f16c")
128            {
129                return Box::new(SurfaceU16ToFloat16Avx2::default());
130            }
131        }
132        Box::new(CommonSurfaceToFloat16::<u16> {
133            _phantom: Default::default(),
134        })
135    }
136
137    #[cfg(not(any(
138        all(target_arch = "aarch64", target_feature = "neon"),
139        any(target_arch = "x86", target_arch = "x86_64")
140    )))]
141    fn make_forward_converter(_: usize) -> Box<dyn SurfaceToFloat16<u16>> {
142        Box::new(CommonSurfaceToFloat16::<u16> {
143            _phantom: Default::default(),
144        })
145    }
146
147    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
148    fn make_inverse_converter(bit_depth: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u16>> {
149        use crate::neon::{SurfaceF16ToUnsigned16Neon, SurfaceF16ToUnsigned16NeonFallback};
150        if bit_depth <= 14 && std::arch::is_aarch64_feature_detected!("fp16") {
151            return Box::new(SurfaceF16ToUnsigned16Neon::default());
152        }
153        Box::new(SurfaceF16ToUnsigned16NeonFallback::default())
154    }
155
156    #[cfg(not(any(
157        all(target_arch = "aarch64", target_feature = "neon"),
158        any(target_arch = "x86", target_arch = "x86_64")
159    )))]
160    fn make_inverse_converter(_: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u16>> {
161        Box::new(CommonSurfaceFloat16ToUnsigned::<u16> {
162            _phantom: Default::default(),
163        })
164    }
165
166    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
167    fn make_inverse_converter(_: usize) -> Box<dyn SurfaceFloat16ToUnsigned<u16>> {
168        Box::new(CommonSurfaceFloat16ToUnsigned::<u16> {
169            _phantom: Default::default(),
170        })
171    }
172}
173
174#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
175struct CommonSurfaceToFloat16<V: num_traits::AsPrimitive<f32> + Copy> {
176    _phantom: std::marker::PhantomData<V>,
177}
178
179#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
180impl<V: num_traits::AsPrimitive<f32> + Copy> SurfaceToFloat16<V> for CommonSurfaceToFloat16<V> {
181    fn to_float16(&self, src: &[V], dst: &mut [f16], bit_depth: usize) {
182        let scale_f32 = 1. / ((1 << (bit_depth)) - 1) as f32;
183        for (src, dst) in src.iter().zip(dst.iter_mut()) {
184            let src_f32 = src.as_();
185            *dst = (src_f32 * scale_f32) as f16;
186        }
187    }
188}
189
190#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
191struct CommonSurfaceFloat16ToUnsigned<V: num_traits::AsPrimitive<f32> + Copy> {
192    _phantom: std::marker::PhantomData<V>,
193}
194
195#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
196impl<V: num_traits::AsPrimitive<f32> + Copy> SurfaceFloat16ToUnsigned<V>
197    for CommonSurfaceFloat16ToUnsigned<V>
198where
199    f32: num_traits::AsPrimitive<V>,
200{
201    fn to_unsigned(&self, src: &[f16], dst: &mut [V], bit_depth: usize) {
202        use num_traits::AsPrimitive;
203        let scale_f32 = ((1 << (bit_depth)) - 1) as f32;
204        for (src, dst) in src.iter().zip(dst.iter_mut()) {
205            let src_f32 = (*src as f32 * scale_f32).round();
206            *dst = src_f32.as_();
207        }
208    }
209}
210
211fn convert_surface_to_f16<V: Copy + ConverterFactoryFloat16<V>, const CN: usize>(
212    src: &[V],
213    src_stride: usize,
214    dst: &mut [f16],
215    dst_stride: usize,
216    bit_depth: usize,
217    width: usize,
218    height: usize,
219) -> Result<(), YuvError> {
220    check_rgba_destination(src, src_stride as u32, width as u32, height as u32, CN)?;
221    check_rgba_destination(dst, dst_stride as u32, width as u32, height as u32, CN)?;
222
223    let converter = V::make_forward_converter(bit_depth);
224
225    for (src, dst) in src
226        .chunks_exact(src_stride)
227        .zip(dst.chunks_exact_mut(dst_stride))
228    {
229        let src = &src[0..width * CN];
230        let dst = &mut dst[0..width * CN];
231        converter.to_float16(src, dst, bit_depth);
232    }
233
234    Ok(())
235}
236
237fn convert_f16_surface_to_unsigned<V: Copy + ConverterFactoryFloat16<V>, const CN: usize>(
238    src: &[f16],
239    src_stride: usize,
240    dst: &mut [V],
241    dst_stride: usize,
242    bit_depth: usize,
243    width: usize,
244    height: usize,
245) -> Result<(), YuvError> {
246    check_rgba_destination(src, src_stride as u32, width as u32, height as u32, CN)?;
247    check_rgba_destination(dst, dst_stride as u32, width as u32, height as u32, CN)?;
248
249    let converter = V::make_inverse_converter(bit_depth);
250
251    for (src, dst) in src
252        .chunks_exact(src_stride)
253        .zip(dst.chunks_exact_mut(dst_stride))
254    {
255        let src = &src[0..width * CN];
256        let dst = &mut dst[0..width * CN];
257        converter.to_unsigned(src, dst, bit_depth);
258    }
259
260    Ok(())
261}
262
263/// Converts planar 8-bit image to `f16`.
264///
265/// # Arguments
266///
267/// * `src`: Source image
268/// * `src_stride`: Source image stride
269/// * `dst`: Destination image
270/// * `dst_stride`: Destination image stride
271/// * `width`: Image width
272/// * `height`: Image height
273///
274/// returns: Result<(), YuvError>
275pub fn convert_plane_to_f16(
276    src: &[u8],
277    src_stride: usize,
278    dst: &mut [f16],
279    dst_stride: usize,
280    width: usize,
281    height: usize,
282) -> Result<(), YuvError> {
283    convert_surface_to_f16::<u8, 1>(src, src_stride, dst, dst_stride, 8, width, height)
284}
285
286/// Converts RGBA 8-bit image to `f16`.
287///
288/// Channel order does not matter.
289///
290/// # Arguments
291///
292/// * `src`: Source image
293/// * `src_stride`: Source image stride
294/// * `dst`: Destination image
295/// * `dst_stride`: Destination image stride
296/// * `width`: Image width
297/// * `height`: Image height
298///
299/// returns: Result<(), YuvError>
300pub fn convert_rgba_to_f16(
301    src: &[u8],
302    src_stride: usize,
303    dst: &mut [f16],
304    dst_stride: usize,
305    width: usize,
306    height: usize,
307) -> Result<(), YuvError> {
308    convert_surface_to_f16::<u8, 4>(src, src_stride, dst, dst_stride, 8, width, height)
309}
310
311/// Converts RGB 8-bit image to `f16`.
312///
313/// Channel order does not matter.
314///
315/// # Arguments
316///
317/// * `src`: Source image
318/// * `src_stride`: Source image stride
319/// * `dst`: Destination image
320/// * `dst_stride`: Destination image stride
321/// * `width`: Image width
322/// * `height`: Image height
323///
324/// returns: Result<(), YuvError>
325pub fn convert_rgb_to_f16(
326    src: &[u8],
327    src_stride: usize,
328    dst: &mut [f16],
329    dst_stride: usize,
330    width: usize,
331    height: usize,
332) -> Result<(), YuvError> {
333    convert_surface_to_f16::<u8, 3>(src, src_stride, dst, dst_stride, 8, width, height)
334}
335
336/// Converts planar 8+ bit-depth image to `f16`.
337///
338/// # Arguments
339///
340/// * `src`: Source image
341/// * `src_stride`: Source image stride
342/// * `dst`: Destination image
343/// * `dst_stride`: Destination image stride
344/// * `bit_depth`: Image bit depth
345/// * `width`: Image width
346/// * `height`: Image height
347///
348/// returns: Result<(), YuvError>
349pub fn convert_plane16_to_f16(
350    src: &[u16],
351    src_stride: usize,
352    dst: &mut [f16],
353    dst_stride: usize,
354    bit_depth: usize,
355    width: usize,
356    height: usize,
357) -> Result<(), YuvError> {
358    convert_surface_to_f16::<u16, 1>(src, src_stride, dst, dst_stride, bit_depth, width, height)
359}
360
361/// Converts RGBA 8+ bit-depth image to `f16`.
362///
363/// Channel order does not matter.
364///
365/// # Arguments
366///
367/// * `src`: Source image
368/// * `src_stride`: Source image stride
369/// * `dst`: Destination image
370/// * `dst_stride`: Destination image stride
371/// * `bit_depth`: Image bit depth
372/// * `width`: Image width
373/// * `height`: Image height
374///
375/// returns: Result<(), YuvError>
376pub fn convert_rgba16_to_f16(
377    src: &[u16],
378    src_stride: usize,
379    dst: &mut [f16],
380    dst_stride: usize,
381    bit_depth: usize,
382    width: usize,
383    height: usize,
384) -> Result<(), YuvError> {
385    convert_surface_to_f16::<u16, 4>(src, src_stride, dst, dst_stride, bit_depth, width, height)
386}
387
388/// Converts RGB 8+ bit-depth image to `f16`.
389///
390/// Channel order does not matter.
391///
392/// # Arguments
393///
394/// * `src`: Source image
395/// * `src_stride`: Source image stride
396/// * `dst`: Destination image
397/// * `dst_stride`: Destination image stride
398/// * `bit_depth`: Image bit depth.
399/// * `width`: Image width
400/// * `height`: Image height
401///
402/// returns: Result<(), YuvError>
403pub fn convert_rgb16_to_f16(
404    src: &[u16],
405    src_stride: usize,
406    dst: &mut [f16],
407    dst_stride: usize,
408    bit_depth: usize,
409    width: usize,
410    height: usize,
411) -> Result<(), YuvError> {
412    convert_surface_to_f16::<u16, 3>(src, src_stride, dst, dst_stride, bit_depth, width, height)
413}
414
415/// Converts planar `f16` image to 8 bit-depth image.
416///
417/// # Arguments
418///
419/// * `src`: Source image
420/// * `src_stride`: Source image stride
421/// * `dst`: Destination image
422/// * `dst_stride`: Destination image stride
423/// * `width`: Image width
424/// * `height`: Image height
425///
426/// returns: Result<(), YuvError>
427pub fn convert_plane_f16_to_planar(
428    src: &[f16],
429    src_stride: usize,
430    dst: &mut [u8],
431    dst_stride: usize,
432    width: usize,
433    height: usize,
434) -> Result<(), YuvError> {
435    convert_f16_surface_to_unsigned::<u8, 1>(src, src_stride, dst, dst_stride, 8, width, height)
436}
437
438/// Converts RGB `f16` image to 8 bit-depth image.
439///
440/// # Arguments
441///
442/// * `src`: Source image
443/// * `src_stride`: Source image stride
444/// * `dst`: Destination image
445/// * `dst_stride`: Destination image stride
446/// * `width`: Image width
447/// * `height`: Image height
448///
449/// returns: Result<(), YuvError>
450pub fn convert_rgb_f16_to_rgb(
451    src: &[f16],
452    src_stride: usize,
453    dst: &mut [u8],
454    dst_stride: usize,
455    width: usize,
456    height: usize,
457) -> Result<(), YuvError> {
458    convert_f16_surface_to_unsigned::<u8, 3>(src, src_stride, dst, dst_stride, 8, width, height)
459}
460
461/// Converts RGBA `f16` image to 8 bit-depth image.
462///
463/// # Arguments
464///
465/// * `src`: Source image
466/// * `src_stride`: Source image stride
467/// * `dst`: Destination image
468/// * `dst_stride`: Destination image stride
469/// * `width`: Image width
470/// * `height`: Image height
471///
472/// returns: Result<(), YuvError>
473pub fn convert_rgba_f16_to_rgba(
474    src: &[f16],
475    src_stride: usize,
476    dst: &mut [u8],
477    dst_stride: usize,
478    width: usize,
479    height: usize,
480) -> Result<(), YuvError> {
481    convert_f16_surface_to_unsigned::<u8, 4>(src, src_stride, dst, dst_stride, 8, width, height)
482}
483
484/// Converts planar `f16` image to 8+ bit-depth image.
485///
486/// # Arguments
487///
488/// * `src`: Source image
489/// * `src_stride`: Source image stride
490/// * `dst`: Destination image
491/// * `dst_stride`: Destination image stride
492/// * `bit_depth`: Image bit depth
493/// * `width`: Image width
494/// * `height`: Image height
495///
496/// returns: Result<(), YuvError>
497pub fn convert_plane_f16_to_planar16(
498    src: &[f16],
499    src_stride: usize,
500    dst: &mut [u16],
501    dst_stride: usize,
502    bit_depth: usize,
503    width: usize,
504    height: usize,
505) -> Result<(), YuvError> {
506    convert_f16_surface_to_unsigned::<u16, 1>(
507        src, src_stride, dst, dst_stride, bit_depth, width, height,
508    )
509}
510
511/// Converts RGB `f16` image to 8+ bit-depth image.
512///
513/// # Arguments
514///
515/// * `src`: Source image
516/// * `src_stride`: Source image stride
517/// * `dst`: Destination image
518/// * `dst_stride`: Destination image stride
519/// * `bit_depth`: Image bit depth
520/// * `width`: Image width
521/// * `height`: Image height
522///
523/// returns: Result<(), YuvError>
524pub fn convert_rgb_f16_to_rgb16(
525    src: &[f16],
526    src_stride: usize,
527    dst: &mut [u16],
528    dst_stride: usize,
529    bit_depth: usize,
530    width: usize,
531    height: usize,
532) -> Result<(), YuvError> {
533    convert_f16_surface_to_unsigned::<u16, 3>(
534        src, src_stride, dst, dst_stride, bit_depth, width, height,
535    )
536}
537
538/// Converts RGBA `f16` image to 8+ bit-depth image.
539///
540/// # Arguments
541///
542/// * `src`: Source image
543/// * `src_stride`: Source image stride
544/// * `dst`: Destination image
545/// * `dst_stride`: Destination image stride
546/// * `bit_depth`: Image bit depth
547/// * `width`: Image width
548/// * `height`: Image height
549///
550/// returns: Result<(), YuvError>
551pub fn convert_rgba_f16_to_rgba16(
552    src: &[f16],
553    src_stride: usize,
554    dst: &mut [u16],
555    dst_stride: usize,
556    bit_depth: usize,
557    width: usize,
558    height: usize,
559) -> Result<(), YuvError> {
560    convert_f16_surface_to_unsigned::<u16, 4>(
561        src, src_stride, dst, dst_stride, bit_depth, width, height,
562    )
563}