#![allow(dead_code)]
#[cfg(any(
target_arch = "aarch64",
target_arch = "x86_64",
target_arch = "wasm32"
))]
use crate::row::arch;
#[cfg(target_arch = "wasm32")]
use crate::row::simd128_available;
#[cfg(target_arch = "x86_64")]
use crate::row::{avx2_available, avx512_available, f16c_available, sse41_available};
#[cfg(target_arch = "aarch64")]
use crate::row::{fp16_available, neon_available};
use crate::{
ColorMatrix,
row::{
rgb_row_bytes, rgb_row_elems, rgba_row_bytes, rgba_row_elems,
scalar::{planar_gbr_f16 as scalar_f16, planar_gbr_float as scalar},
},
};
const HOST_NATIVE_BE: bool = cfg!(target_endian = "big");
use crate::row::scalar::planar_gbr_f16::widen_f16_be_to_host_f32;
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf32_to_rgb_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [u8],
width: usize,
use_simd: bool,
) {
let out_min = rgb_row_bytes(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrpf32_to_rgb_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf32_to_rgb_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrpf32_to_rgb_row::<BE>(g, b, r, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrpf32_to_rgb_row::<BE>(g, b, r, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrpf32_to_rgb_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_rgb_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf32_to_rgba_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [u8],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_bytes(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrpf32_to_rgba_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf32_to_rgba_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrpf32_to_rgba_row::<BE>(g, b, r, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrpf32_to_rgba_row::<BE>(g, b, r, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrpf32_to_rgba_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_rgba_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf32_to_rgb_u16_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [u16],
width: usize,
use_simd: bool,
) {
let out_min = rgb_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrpf32_to_rgb_u16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf32_to_rgb_u16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrpf32_to_rgb_u16_row::<BE>(g, b, r, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrpf32_to_rgb_u16_row::<BE>(g, b, r, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrpf32_to_rgb_u16_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_rgb_u16_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf32_to_rgba_u16_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [u16],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrpf32_to_rgba_u16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf32_to_rgba_u16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrpf32_to_rgba_u16_row::<BE>(g, b, r, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrpf32_to_rgba_u16_row::<BE>(g, b, r, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrpf32_to_rgba_u16_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_rgba_u16_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf32_to_rgb_f32_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [f32],
width: usize,
use_simd: bool,
) {
let out_min = rgb_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrpf32_to_rgb_f32_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_rgb_f32_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf32_to_rgba_f32_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [f32],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrpf32_to_rgba_f32_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_rgba_f32_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf32_to_rgb_f16_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [half::f16],
width: usize,
use_simd: bool,
) {
let out_min = rgb_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if fp16_available() {
unsafe { arch::neon::gbrpf32_to_rgb_f16_row_fp16::<BE>(g, b, r, out, width); }
} else {
scalar::gbrpf32_to_rgb_f16_row::<BE>(g, b, r, out, width);
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if f16c_available() {
unsafe { arch::x86_avx512::gbrpf32_to_rgb_f16_row_f16c::<BE>(g, b, r, out, width); }
} else {
scalar::gbrpf32_to_rgb_f16_row::<BE>(g, b, r, out, width);
}
return;
}
if avx2_available() {
if f16c_available() {
unsafe { arch::x86_avx2::gbrpf32_to_rgb_f16_row_f16c::<BE>(g, b, r, out, width); }
} else {
scalar::gbrpf32_to_rgb_f16_row::<BE>(g, b, r, out, width);
}
return;
}
if sse41_available() {
if f16c_available() {
unsafe { arch::x86_sse41::gbrpf32_to_rgb_f16_row_f16c::<BE>(g, b, r, out, width); }
} else {
scalar::gbrpf32_to_rgb_f16_row::<BE>(g, b, r, out, width);
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf32_to_rgb_f16_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_rgb_f16_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf32_to_rgba_f16_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [half::f16],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if fp16_available() {
unsafe { arch::neon::gbrpf32_to_rgba_f16_row_fp16::<BE>(g, b, r, out, width); }
} else {
scalar::gbrpf32_to_rgba_f16_row::<BE>(g, b, r, out, width);
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if f16c_available() {
unsafe { arch::x86_avx512::gbrpf32_to_rgba_f16_row_f16c::<BE>(g, b, r, out, width); }
} else {
scalar::gbrpf32_to_rgba_f16_row::<BE>(g, b, r, out, width);
}
return;
}
if avx2_available() {
if f16c_available() {
unsafe { arch::x86_avx2::gbrpf32_to_rgba_f16_row_f16c::<BE>(g, b, r, out, width); }
} else {
scalar::gbrpf32_to_rgba_f16_row::<BE>(g, b, r, out, width);
}
return;
}
if sse41_available() {
if f16c_available() {
unsafe { arch::x86_sse41::gbrpf32_to_rgba_f16_row_f16c::<BE>(g, b, r, out, width); }
} else {
scalar::gbrpf32_to_rgba_f16_row::<BE>(g, b, r, out, width);
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf32_to_rgba_f16_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_rgba_f16_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub(crate) fn gbrpf32_to_luma_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [u8],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
) {
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= width, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrpf32_to_luma_row::<BE>(g, b, r, out, width, matrix, full_range); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf32_to_luma_row::<BE>(g, b, r, out, width, matrix, full_range); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrpf32_to_luma_row::<BE>(g, b, r, out, width, matrix, full_range); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrpf32_to_luma_row::<BE>(g, b, r, out, width, matrix, full_range); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrpf32_to_luma_row::<BE>(g, b, r, out, width, matrix, full_range); }
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_luma_row::<BE>(g, b, r, out, width, matrix, full_range);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub(crate) fn gbrpf32_to_luma_u16_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
out: &mut [u16],
width: usize,
matrix: ColorMatrix,
full_range: bool,
use_simd: bool,
) {
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= width, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe {
arch::neon::gbrpf32_to_luma_u16_row::<BE>(g, b, r, out, width, matrix, full_range);
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe {
arch::wasm_simd128::gbrpf32_to_luma_u16_row::<BE>(g, b, r, out, width, matrix, full_range);
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe {
arch::x86_avx512::gbrpf32_to_luma_u16_row::<BE>(g, b, r, out, width, matrix, full_range);
}
return;
}
if avx2_available() {
unsafe {
arch::x86_avx2::gbrpf32_to_luma_u16_row::<BE>(g, b, r, out, width, matrix, full_range);
}
return;
}
if sse41_available() {
unsafe {
arch::x86_sse41::gbrpf32_to_luma_u16_row::<BE>(g, b, r, out, width, matrix, full_range);
}
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_luma_u16_row::<BE>(g, b, r, out, width, matrix, full_range);
}
#[cfg_attr(not(tarpaulin), inline(always))]
#[allow(clippy::too_many_arguments)]
pub(crate) fn gbrpf32_to_hsv_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
h_out: &mut [u8],
s_out: &mut [u8],
v_out: &mut [u8],
width: usize,
use_simd: bool,
) {
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(h_out.len() >= width, "h_out too short");
assert!(s_out.len() >= width, "s_out too short");
assert!(v_out.len() >= width, "v_out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe {
arch::neon::gbrpf32_to_hsv_row::<BE>(g, b, r, h_out, s_out, v_out, width);
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe {
arch::wasm_simd128::gbrpf32_to_hsv_row::<BE>(g, b, r, h_out, s_out, v_out, width);
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe {
arch::x86_avx512::gbrpf32_to_hsv_row::<BE>(g, b, r, h_out, s_out, v_out, width);
}
return;
}
if avx2_available() {
unsafe {
arch::x86_avx2::gbrpf32_to_hsv_row::<BE>(g, b, r, h_out, s_out, v_out, width);
}
return;
}
if sse41_available() {
unsafe {
arch::x86_sse41::gbrpf32_to_hsv_row::<BE>(g, b, r, h_out, s_out, v_out, width);
}
return;
}
},
_ => {}
}
}
scalar::gbrpf32_to_hsv_row::<BE>(g, b, r, h_out, s_out, v_out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrapf32_to_rgba_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
a: &[f32],
out: &mut [u8],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_bytes(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(a.len() >= width, "a row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrapf32_to_rgba_row::<BE>(g, b, r, a, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrapf32_to_rgba_row::<BE>(g, b, r, a, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrapf32_to_rgba_row::<BE>(g, b, r, a, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrapf32_to_rgba_row::<BE>(g, b, r, a, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrapf32_to_rgba_row::<BE>(g, b, r, a, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrapf32_to_rgba_row::<BE>(g, b, r, a, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrapf32_to_rgba_u16_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
a: &[f32],
out: &mut [u16],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(a.len() >= width, "a row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrapf32_to_rgba_u16_row::<BE>(g, b, r, a, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrapf32_to_rgba_u16_row::<BE>(g, b, r, a, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrapf32_to_rgba_u16_row::<BE>(g, b, r, a, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrapf32_to_rgba_u16_row::<BE>(g, b, r, a, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrapf32_to_rgba_u16_row::<BE>(g, b, r, a, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrapf32_to_rgba_u16_row::<BE>(g, b, r, a, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrapf32_to_rgba_f32_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
a: &[f32],
out: &mut [f32],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(a.len() >= width, "a row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrapf32_to_rgba_f32_row::<BE>(g, b, r, a, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrapf32_to_rgba_f32_row::<BE>(g, b, r, a, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrapf32_to_rgba_f16_row<const BE: bool>(
g: &[f32],
b: &[f32],
r: &[f32],
a: &[f32],
out: &mut [half::f16],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(a.len() >= width, "a row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if fp16_available() {
unsafe { arch::neon::gbrapf32_to_rgba_f16_row_fp16::<BE>(g, b, r, a, out, width); }
} else {
scalar::gbrapf32_to_rgba_f16_row::<BE>(g, b, r, a, out, width);
}
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if f16c_available() {
unsafe { arch::x86_avx512::gbrapf32_to_rgba_f16_row_f16c::<BE>(g, b, r, a, out, width); }
} else {
scalar::gbrapf32_to_rgba_f16_row::<BE>(g, b, r, a, out, width);
}
return;
}
if avx2_available() {
if f16c_available() {
unsafe { arch::x86_avx2::gbrapf32_to_rgba_f16_row_f16c::<BE>(g, b, r, a, out, width); }
} else {
scalar::gbrapf32_to_rgba_f16_row::<BE>(g, b, r, a, out, width);
}
return;
}
if sse41_available() {
if f16c_available() {
unsafe { arch::x86_sse41::gbrapf32_to_rgba_f16_row_f16c::<BE>(g, b, r, a, out, width); }
} else {
scalar::gbrapf32_to_rgba_f16_row::<BE>(g, b, r, a, out, width);
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrapf32_to_rgba_f16_row::<BE>(g, b, r, a, out, width); }
return;
}
},
_ => {}
}
}
scalar::gbrapf32_to_rgba_f16_row::<BE>(g, b, r, a, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf16_to_rgb_f16_row<const BE: bool>(
g: &[half::f16],
b: &[half::f16],
r: &[half::f16],
out: &mut [half::f16],
width: usize,
use_simd: bool,
) {
let out_min = rgb_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrpf16_to_rgb_f16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf16_to_rgb_f16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrpf16_to_rgb_f16_row::<BE>(g, b, r, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrpf16_to_rgb_f16_row::<BE>(g, b, r, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrpf16_to_rgb_f16_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar_f16::gbrpf16_to_rgb_f16_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf16_to_rgba_f16_row<const BE: bool>(
g: &[half::f16],
b: &[half::f16],
r: &[half::f16],
out: &mut [half::f16],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrpf16_to_rgba_f16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf16_to_rgba_f16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrpf16_to_rgba_f16_row::<BE>(g, b, r, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrpf16_to_rgba_f16_row::<BE>(g, b, r, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrpf16_to_rgba_f16_row::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
scalar_f16::gbrpf16_to_rgba_f16_row::<BE>(g, b, r, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrapf16_to_rgba_f16_row<const BE: bool>(
g: &[half::f16],
b: &[half::f16],
r: &[half::f16],
a: &[half::f16],
out: &mut [half::f16],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(a.len() >= width, "a row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
unsafe { arch::neon::gbrapf16_to_rgba_f16_row::<BE>(g, b, r, a, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrapf16_to_rgba_f16_row::<BE>(g, b, r, a, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
unsafe { arch::x86_avx512::gbrapf16_to_rgba_f16_row::<BE>(g, b, r, a, out, width); }
return;
}
if avx2_available() {
unsafe { arch::x86_avx2::gbrapf16_to_rgba_f16_row::<BE>(g, b, r, a, out, width); }
return;
}
if sse41_available() {
unsafe { arch::x86_sse41::gbrapf16_to_rgba_f16_row::<BE>(g, b, r, a, out, width); }
return;
}
},
_ => {}
}
}
scalar_f16::gbrapf16_to_rgba_f16_row::<BE>(g, b, r, a, out, width);
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf16_to_rgb_u16_row<const BE: bool>(
g: &[half::f16],
b: &[half::f16],
r: &[half::f16],
out: &mut [u16],
width: usize,
use_simd: bool,
) {
let out_min = rgb_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() && fp16_available() {
unsafe { arch::neon::gbrpf16_to_rgb_u16_row_fp16::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf16_to_rgb_u16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() && f16c_available() {
unsafe { arch::x86_avx512::gbrpf16_to_rgb_u16_row_f16c::<BE>(g, b, r, out, width); }
return;
}
if avx2_available() && f16c_available() {
unsafe { arch::x86_avx2::gbrpf16_to_rgb_u16_row_f16c::<BE>(g, b, r, out, width); }
return;
}
if sse41_available() && f16c_available() {
unsafe { arch::x86_sse41::gbrpf16_to_rgb_u16_row_f16c::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
scalar::gbrpf32_to_rgb_u16_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 3..],
n,
);
offset += n;
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf16_to_rgba_u16_row<const BE: bool>(
g: &[half::f16],
b: &[half::f16],
r: &[half::f16],
out: &mut [u16],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_elems(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() && fp16_available() {
unsafe { arch::neon::gbrpf16_to_rgba_u16_row_fp16::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf16_to_rgba_u16_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() && f16c_available() {
unsafe { arch::x86_avx512::gbrpf16_to_rgba_u16_row_f16c::<BE>(g, b, r, out, width); }
return;
}
if avx2_available() && f16c_available() {
unsafe { arch::x86_avx2::gbrpf16_to_rgba_u16_row_f16c::<BE>(g, b, r, out, width); }
return;
}
if sse41_available() && f16c_available() {
unsafe { arch::x86_sse41::gbrpf16_to_rgba_u16_row_f16c::<BE>(g, b, r, out, width); }
return;
}
},
_ => {}
}
}
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
scalar::gbrpf32_to_rgba_u16_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 4..],
n,
);
offset += n;
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf16_to_rgb_row<const BE: bool>(
g: &[half::f16],
b: &[half::f16],
r: &[half::f16],
out: &mut [u8],
width: usize,
use_simd: bool,
) {
let out_min = rgb_row_bytes(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if fp16_available() {
unsafe { arch::neon::gbrpf16_to_rgb_row_fp16::<BE>(g, b, r, out, width); }
} else {
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
unsafe {
arch::neon::gbrpf32_to_rgb_row::<HOST_NATIVE_BE>(&gf[..n], &bf[..n], &rf[..n], &mut out[offset * 3..], n);
}
offset += n;
}
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf16_to_rgb_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if f16c_available() {
unsafe { arch::x86_avx512::gbrpf16_to_rgb_row_f16c::<BE>(g, b, r, out, width); }
} else {
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
unsafe {
arch::x86_avx512::gbrpf32_to_rgb_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 3..],
n,
);
}
offset += n;
}
}
return;
}
if avx2_available() {
if f16c_available() {
unsafe { arch::x86_avx2::gbrpf16_to_rgb_row_f16c::<BE>(g, b, r, out, width); }
} else {
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
unsafe {
arch::x86_avx2::gbrpf32_to_rgb_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 3..],
n,
);
}
offset += n;
}
}
return;
}
if sse41_available() {
if f16c_available() {
unsafe { arch::x86_sse41::gbrpf16_to_rgb_row_f16c::<BE>(g, b, r, out, width); }
} else {
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
unsafe {
arch::x86_sse41::gbrpf32_to_rgb_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 3..],
n,
);
}
offset += n;
}
}
return;
}
},
_ => {}
}
}
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
scalar::gbrpf32_to_rgb_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 3..],
n,
);
offset += n;
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub(crate) fn gbrpf16_to_rgba_row<const BE: bool>(
g: &[half::f16],
b: &[half::f16],
r: &[half::f16],
out: &mut [u8],
width: usize,
use_simd: bool,
) {
let out_min = rgba_row_bytes(width);
assert!(g.len() >= width, "g row too short");
assert!(b.len() >= width, "b row too short");
assert!(r.len() >= width, "r row too short");
assert!(out.len() >= out_min, "out too short");
if use_simd {
cfg_select! {
target_arch = "aarch64" => {
if neon_available() {
if fp16_available() {
unsafe { arch::neon::gbrpf16_to_rgba_row_fp16::<BE>(g, b, r, out, width); }
} else {
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
unsafe {
arch::neon::gbrpf32_to_rgba_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 4..],
n,
);
}
offset += n;
}
}
return;
}
},
target_arch = "wasm32" => {
if simd128_available() {
unsafe { arch::wasm_simd128::gbrpf16_to_rgba_row::<BE>(g, b, r, out, width); }
return;
}
},
target_arch = "x86_64" => {
if avx512_available() {
if f16c_available() {
unsafe { arch::x86_avx512::gbrpf16_to_rgba_row_f16c::<BE>(g, b, r, out, width); }
} else {
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
unsafe {
arch::x86_avx512::gbrpf32_to_rgba_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 4..],
n,
);
}
offset += n;
}
}
return;
}
if avx2_available() {
if f16c_available() {
unsafe { arch::x86_avx2::gbrpf16_to_rgba_row_f16c::<BE>(g, b, r, out, width); }
} else {
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
unsafe {
arch::x86_avx2::gbrpf32_to_rgba_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 4..],
n,
);
}
offset += n;
}
}
return;
}
if sse41_available() {
if f16c_available() {
unsafe { arch::x86_sse41::gbrpf16_to_rgba_row_f16c::<BE>(g, b, r, out, width); }
} else {
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
unsafe {
arch::x86_sse41::gbrpf32_to_rgba_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 4..],
n,
);
}
offset += n;
}
}
return;
}
},
_ => {}
}
}
const CHUNK: usize = 64;
let mut gf = [0.0f32; CHUNK];
let mut bf = [0.0f32; CHUNK];
let mut rf = [0.0f32; CHUNK];
let mut offset = 0;
while offset < width {
let n = (width - offset).min(CHUNK);
widen_f16_be_to_host_f32::<BE>(g, offset, &mut gf, n);
widen_f16_be_to_host_f32::<BE>(b, offset, &mut bf, n);
widen_f16_be_to_host_f32::<BE>(r, offset, &mut rf, n);
scalar::gbrpf32_to_rgba_row::<HOST_NATIVE_BE>(
&gf[..n],
&bf[..n],
&rf[..n],
&mut out[offset * 4..],
n,
);
offset += n;
}
}
#[cfg(all(test, feature = "std", target_pointer_width = "32"))]
mod tests {
use super::*;
#[test]
#[should_panic(expected = "overflows usize")]
fn gbrpf32_to_rgb_panics_on_width_overflow() {
let g: &[f32] = &[];
let b: &[f32] = &[];
let r: &[f32] = &[];
let mut out: [u8; 0] = [];
let w = usize::MAX / 2 + 1;
gbrpf32_to_rgb_row::<false>(g, b, r, &mut out, w, false);
}
#[test]
#[should_panic(expected = "overflows usize")]
fn gbrpf32_to_rgba_panics_on_width_overflow() {
let g: &[f32] = &[];
let b: &[f32] = &[];
let r: &[f32] = &[];
let mut out: [u8; 0] = [];
let w = usize::MAX / 2 + 1;
gbrpf32_to_rgba_row::<false>(g, b, r, &mut out, w, false);
}
#[test]
#[should_panic(expected = "overflows usize")]
fn gbrpf32_to_rgb_u16_panics_on_width_overflow() {
let g: &[f32] = &[];
let b: &[f32] = &[];
let r: &[f32] = &[];
let mut out: [u16; 0] = [];
let w = usize::MAX / 2 + 1;
gbrpf32_to_rgb_u16_row::<false>(g, b, r, &mut out, w, false);
}
#[test]
#[should_panic(expected = "overflows usize")]
fn gbrpf32_to_rgba_u16_panics_on_width_overflow() {
let g: &[f32] = &[];
let b: &[f32] = &[];
let r: &[f32] = &[];
let mut out: [u16; 0] = [];
let w = usize::MAX / 2 + 1;
gbrpf32_to_rgba_u16_row::<false>(g, b, r, &mut out, w, false);
}
}