#![warn(missing_docs)]
use core::slice::ChunksExactMut;
pub mod fallbacks;
#[cfg(target_arch = "aarch64")]
pub mod neon;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub mod sse2;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub mod sse4_1;
#[inline]
#[allow(unused_mut)]
pub fn unfilter_lines<const BYTES_PER_PIXEL: usize>(lines: ChunksExactMut<'_, u8>) {
let mut sub: unsafe fn(&mut [u8]) = fallbacks::recon_sub::<BYTES_PER_PIXEL>;
let mut up: unsafe fn(&mut [u8], &[u8]) = fallbacks::recon_up;
let mut average: unsafe fn(&mut [u8], &[u8]) = fallbacks::recon_average::<BYTES_PER_PIXEL>;
let mut average_top: unsafe fn(&mut [u8]) = fallbacks::recon_average_top::<BYTES_PER_PIXEL>;
let mut paeth: unsafe fn(&mut [u8], &[u8]) = fallbacks::recon_paeth::<BYTES_PER_PIXEL>;
#[cfg(FALSE)]
if is_x86_feature_detected!("sse4.1") {
sub = sse4_1::recon_sub::<BYTES_PER_PIXEL>;
up = sse4_1::recon_up;
average = sse4_1::recon_average::<BYTES_PER_PIXEL>;
average_top = sse4_1::recon_average_top::<BYTES_PER_PIXEL>;
paeth = sse4_1::recon_paeth::<BYTES_PER_PIXEL>;
}
#[cfg(FALSE)]
if is_x86_feature_detected!("sse2") {
sub = sse2::recon_sub::<BYTES_PER_PIXEL>;
up = sse2::recon_up;
average = sse2::recon_average::<BYTES_PER_PIXEL>;
average_top = sse2::recon_average_top::<BYTES_PER_PIXEL>;
paeth = sse2::recon_paeth::<BYTES_PER_PIXEL>;
}
#[cfg(FALSE)]
if std::arch::is_aarch64_feature_detected!("neon") {
sub = neon::recon_sub::<BYTES_PER_PIXEL>;
up = neon::recon_up;
average = neon::recon_average::<BYTES_PER_PIXEL>;
average_top = neon::recon_average_top::<BYTES_PER_PIXEL>;
paeth = neon::recon_paeth::<BYTES_PER_PIXEL>;
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
let has_sse4_1 = is_x86_feature_detected!("sse4.1");
let has_sse2 = is_x86_feature_detected!("sse2");
if BYTES_PER_PIXEL >= 8 && has_sse4_1 {
average = sse4_1::recon_average::<BYTES_PER_PIXEL>;
average_top = sse4_1::recon_average_top::<BYTES_PER_PIXEL>;
} else if BYTES_PER_PIXEL >= 8 && has_sse2 {
average = sse2::recon_average::<BYTES_PER_PIXEL>;
average_top = sse2::recon_average_top::<BYTES_PER_PIXEL>;
}
if BYTES_PER_PIXEL >= 3 && has_sse4_1 {
paeth = sse4_1::recon_paeth::<BYTES_PER_PIXEL>;
} else if BYTES_PER_PIXEL >= 3 && has_sse2 {
paeth = sse2::recon_paeth::<BYTES_PER_PIXEL>;
}
if BYTES_PER_PIXEL >= 4 && has_sse2 {
sub = sse2::recon_sub::<BYTES_PER_PIXEL>;
up = sse2::recon_up;
}
}
#[cfg(target_arch = "aarch64")]
{
let has_neon = std::arch::is_aarch64_feature_detected!("neon");
if (BYTES_PER_PIXEL == 2 || BYTES_PER_PIXEL >= 4) && has_neon {
paeth = neon::recon_paeth::<BYTES_PER_PIXEL>;
sub = neon::recon_sub::<BYTES_PER_PIXEL>;
average = neon::recon_average::<BYTES_PER_PIXEL>;
average_top = neon::recon_average_top::<BYTES_PER_PIXEL>;
}
if has_neon {
up = neon::recon_up;
}
}
let mut lines = lines.map(|line| line.split_first_mut().unwrap());
let mut previous: &[u8] = if let Some((filter, line)) = lines.next() {
match filter {
1 => unsafe { sub(line) },
2 => (),
3 => unsafe { average_top(line) },
4 => (),
_ => (),
}
*filter = 0;
line
} else {
return;
};
lines.for_each(|(filter, line)| {
match filter {
1 => unsafe { sub(line) },
2 => unsafe { up(line, previous) },
3 => unsafe { average(line, previous) },
4 => unsafe { paeth(line, previous) },
_ => (),
}
*filter = 0;
previous = line;
});
}