#![forbid(unsafe_code)]
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
use crate::avx2::{avx_premultiply_alpha_rgba_f32, avx_unpremultiply_alpha_rgba_f32};
#[cfg(all(target_arch = "aarch64", feature = "neon",))]
use crate::neon::{neon_premultiply_alpha_rgba_f32, neon_unpremultiply_alpha_rgba_f32};
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
use crate::sse::{sse_premultiply_alpha_rgba_f32, sse_unpremultiply_alpha_rgba_f32};
use novtb::{ParallelZonedIterator, TbSliceMut};
pub(crate) fn unpremultiply_rgba_f32_row(in_place: &mut [f32]) {
for dst in in_place.as_chunks_mut::<4>().0.iter_mut() {
let mut r = dst[0];
let mut g = dst[1];
let mut b = dst[2];
let a = dst[3];
if a != 0. {
let scale_alpha = 1. / a;
r *= scale_alpha;
g *= scale_alpha;
b *= scale_alpha;
} else {
r = 0.;
g = 0.;
b = 0.;
}
dst[0] = r;
dst[1] = g;
dst[2] = b;
dst[3] = a;
}
}
pub(crate) fn unpremultiply_gray_alpha_f32_row(in_place: &mut [f32]) {
for dst in in_place.as_chunks_mut::<2>().0.iter_mut() {
let mut r = dst[0];
let a = dst[1];
if a != 0. {
let scale_alpha = 1. / a;
r *= scale_alpha;
} else {
r = 0.;
}
dst[0] = r;
dst[1] = a;
}
}
pub(crate) fn premultiply_rgba_f32_row(dst: &mut [f32], src: &[f32]) {
for (dst, src) in dst
.as_chunks_mut::<4>()
.0
.iter_mut()
.zip(src.as_chunks::<4>().0.iter())
{
let mut r = src[0];
let mut g = src[1];
let mut b = src[2];
let a = src[3];
r *= a;
g *= a;
b *= a;
dst[0] = r;
dst[1] = g;
dst[2] = b;
dst[3] = a;
}
}
pub(crate) fn premultiply_gray_alpha_f32_row(dst: &mut [f32], src: &[f32]) {
for (dst, src) in dst
.as_chunks_mut::<2>()
.0
.iter_mut()
.zip(src.as_chunks::<2>().0.iter())
{
let mut r = src[0];
let a = src[1];
r *= a;
dst[0] = r;
dst[1] = a;
}
}
fn premultiply_alpha_gray_alpha_impl_f32(
dst: &mut [f32],
dst_stride: usize,
src: &[f32],
src_stride: usize,
width: usize,
_: usize,
pool: &novtb::ThreadPool,
) {
dst.tb_par_chunks_mut(dst_stride)
.zip(src.chunks(src_stride))
.for_each(pool, |(dst, src)| {
premultiply_gray_alpha_f32_row(&mut dst[..width * 2], &src[..width * 2]);
});
}
fn unpremultiply_alpha_gray_alpha_impl_f32(
in_place: &mut [f32],
stride: usize,
width: usize,
_: usize,
pool: &novtb::ThreadPool,
) {
in_place.tb_par_chunks_mut(stride).for_each(pool, |row| {
unpremultiply_gray_alpha_f32_row(&mut row[..width * 2]);
});
}
pub(crate) fn premultiply_alpha_rgba_f32(
dst: &mut [f32],
dst_stride: usize,
src: &[f32],
src_stride: usize,
width: usize,
_: usize,
pool: &novtb::ThreadPool,
) {
#[allow(clippy::type_complexity)]
let mut _dispatcher: fn(&mut [f32], &[f32]) = premultiply_rgba_f32_row;
#[cfg(all(target_arch = "aarch64", feature = "neon"))]
{
_dispatcher = neon_premultiply_alpha_rgba_f32;
}
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
_dispatcher = sse_premultiply_alpha_rgba_f32;
}
}
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
if std::arch::is_x86_feature_detected!("avx2") {
_dispatcher = avx_premultiply_alpha_rgba_f32;
}
}
dst.tb_par_chunks_mut(dst_stride)
.zip(src.chunks(src_stride))
.for_each(pool, |(dst, src)| {
_dispatcher(&mut dst[..width * 4], &src[..width * 4]);
});
}
pub(crate) fn premultiply_alpha_gray_alpha_f32(
dst: &mut [f32],
dst_stride: usize,
src: &[f32],
src_stride: usize,
width: usize,
height: usize,
pool: &novtb::ThreadPool,
) {
#[allow(clippy::type_complexity)]
let mut _dispatcher: fn(
&mut [f32],
usize,
&[f32],
usize,
usize,
usize,
&novtb::ThreadPool,
) = premultiply_alpha_gray_alpha_impl_f32;
_dispatcher(dst, dst_stride, src, src_stride, width, height, pool);
}
pub(crate) fn unpremultiply_alpha_rgba_f32(
in_place: &mut [f32],
stride: usize,
width: usize,
_: usize,
pool: &novtb::ThreadPool,
) {
let mut _dispatcher: fn(&mut [f32]) = unpremultiply_rgba_f32_row;
#[cfg(all(target_arch = "aarch64", feature = "neon"))]
{
_dispatcher = neon_unpremultiply_alpha_rgba_f32;
}
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "sse"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
_dispatcher = sse_unpremultiply_alpha_rgba_f32;
}
}
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
if std::arch::is_x86_feature_detected!("avx2") {
_dispatcher = avx_unpremultiply_alpha_rgba_f32;
}
}
in_place.tb_par_chunks_mut(stride).for_each(pool, |row| {
_dispatcher(&mut row[..width * 4]);
});
}
pub(crate) fn unpremultiply_alpha_gray_alpha_f32(
in_place: &mut [f32],
stride: usize,
width: usize,
height: usize,
pool: &novtb::ThreadPool,
) {
let mut _dispatcher: fn(&mut [f32], usize, usize, usize, &novtb::ThreadPool) =
unpremultiply_alpha_gray_alpha_impl_f32;
_dispatcher(in_place, stride, width, height, pool);
}