use crate::{DynRgbaReader, DynRgbaReaderSpec, formats::rgb::RgbaBlock};
use std::fmt::Debug;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(crate) mod avx2;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(crate) mod avx512;
#[cfg(target_arch = "aarch64")]
pub(crate) mod neon;
pub(crate) unsafe trait Vector: Debug + Copy + 'static {
const LEN: usize;
type Mask;
unsafe fn splat(v: f32) -> Self;
unsafe fn vadd(self, other: Self) -> Self;
unsafe fn vaddf(self, other: f32) -> Self {
self.vadd(Self::splat(other))
}
unsafe fn vsub(self, other: Self) -> Self;
unsafe fn vsubf(self, other: f32) -> Self {
self.vsub(Self::splat(other))
}
unsafe fn vmul(self, other: Self) -> Self;
unsafe fn vmulf(self, other: f32) -> Self {
self.vmul(Self::splat(other))
}
unsafe fn vdiv(self, other: Self) -> Self;
unsafe fn vdivf(self, other: f32) -> Self {
self.vdiv(Self::splat(other))
}
unsafe fn vmax(self, other: Self) -> Self;
unsafe fn vmaxf(self, other: f32) -> Self {
self.vmax(Self::splat(other))
}
unsafe fn lt(self, other: Self) -> Self::Mask;
unsafe fn ltf(self, other: f32) -> Self::Mask {
self.lt(Self::splat(other))
}
unsafe fn le(self, other: Self) -> Self::Mask;
unsafe fn lef(self, other: f32) -> Self::Mask {
self.le(Self::splat(other))
}
unsafe fn select(a: Self, b: Self, mask: Self::Mask) -> Self;
unsafe fn vsqrt(self) -> Self;
unsafe fn vpow(self, pow: Self) -> Self;
unsafe fn vpowf(self, pow: f32) -> Self {
self.vpow(Self::splat(pow))
}
unsafe fn vln(self) -> Self;
unsafe fn zip(self, other: Self) -> (Self, Self);
unsafe fn unzip(self, other: Self) -> (Self, Self);
unsafe fn load_u8(ptr: *const u8) -> Self;
unsafe fn load_u16(ptr: *const u8) -> Self;
unsafe fn load_u8_3x_interleaved_2x(ptr: *const u8) -> [[Self; 3]; 2];
unsafe fn load_u16_3x_interleaved_2x(ptr: *const u8) -> [[Self; 3]; 2];
unsafe fn load_u8_4x_interleaved_2x(ptr: *const u8) -> [[Self; 4]; 2];
unsafe fn load_u16_4x_interleaved_2x(ptr: *const u8) -> [[Self; 4]; 2];
unsafe fn write_u8(self, ptr: *mut u8);
unsafe fn write_u8_2x(v0: Self, v1: Self, ptr: *mut u8);
unsafe fn write_u16(self, ptr: *mut u8);
unsafe fn write_u16_2x(v0: Self, v1: Self, ptr: *mut u8);
unsafe fn write_interleaved_3x_2x_u8(this: [[Self; 3]; 2], ptr: *mut u8);
unsafe fn write_interleaved_3x_2x_u16(this: [[Self; 3]; 2], ptr: *mut u8);
unsafe fn write_interleaved_4x_2x_u8(this: [[Self; 4]; 2], ptr: *mut u8);
unsafe fn write_interleaved_4x_2x_u16(this: [[Self; 4]; 2], ptr: *mut u8);
unsafe fn dyn_rgba_read<'a>(
v: &mut (dyn DynRgbaReader + 'a),
x: usize,
y: usize,
) -> RgbaBlock<Self>;
}
unsafe impl Vector for f32 {
const LEN: usize = 1;
type Mask = bool;
#[inline(always)]
unsafe fn splat(v: f32) -> Self {
v
}
#[inline(always)]
unsafe fn vadd(self, other: Self) -> Self {
self + other
}
#[inline(always)]
unsafe fn vsub(self, other: Self) -> Self {
self - other
}
#[inline(always)]
unsafe fn vmul(self, other: Self) -> Self {
self * other
}
#[inline(always)]
unsafe fn vdiv(self, other: Self) -> Self {
self / other
}
#[inline(always)]
unsafe fn vmax(self, other: Self) -> Self {
self.max(other)
}
#[inline(always)]
unsafe fn lt(self, other: Self) -> Self::Mask {
self < other
}
#[inline(always)]
unsafe fn le(self, other: Self) -> Self::Mask {
self <= other
}
#[inline(always)]
unsafe fn select(a: Self, b: Self, mask: Self::Mask) -> Self {
if mask { a } else { b }
}
#[inline(always)]
unsafe fn vsqrt(self) -> Self {
f32::sqrt(self)
}
#[inline(always)]
unsafe fn vpow(self, pow: Self) -> Self {
self.powf(pow)
}
#[inline(always)]
unsafe fn vln(self) -> Self {
self.ln()
}
#[inline(always)]
unsafe fn zip(self, other: Self) -> (Self, Self) {
(self, other)
}
#[inline(always)]
unsafe fn unzip(self, other: Self) -> (Self, Self) {
(self, other)
}
#[inline(always)]
unsafe fn load_u8(ptr: *const u8) -> Self {
Self::from(ptr.read_unaligned())
}
#[inline(always)]
unsafe fn load_u16(ptr: *const u8) -> Self {
Self::from(ptr.cast::<u16>().read_unaligned())
}
#[inline(always)]
unsafe fn load_u8_3x_interleaved_2x(ptr: *const u8) -> [[Self; 3]; 2] {
let v = ptr.cast::<[[u8; 3]; 2]>().read_unaligned();
v.map(|v| v.map(|v| v as f32))
}
#[inline(always)]
unsafe fn load_u16_3x_interleaved_2x(ptr: *const u8) -> [[Self; 3]; 2] {
let v = ptr.cast::<[[u16; 3]; 2]>().read_unaligned();
v.map(|v| v.map(|v| v as f32))
}
#[inline(always)]
unsafe fn load_u8_4x_interleaved_2x(ptr: *const u8) -> [[Self; 4]; 2] {
let v = ptr.cast::<[[u8; 4]; 2]>().read_unaligned();
v.map(|v| v.map(|v| v as f32))
}
#[inline(always)]
unsafe fn load_u16_4x_interleaved_2x(ptr: *const u8) -> [[Self; 4]; 2] {
let v = ptr.cast::<[[u16; 4]; 2]>().read_unaligned();
v.map(|v| v.map(|v| v as f32))
}
#[inline(always)]
unsafe fn write_u8(self, ptr: *mut u8) {
ptr.write(self as u8)
}
#[inline(always)]
unsafe fn write_u8_2x(v0: Self, v1: Self, ptr: *mut u8) {
ptr.cast::<[u8; 2]>().write_unaligned([v0 as u8, v1 as u8]);
}
#[inline(always)]
unsafe fn write_u16(self, ptr: *mut u8) {
ptr.cast::<u16>().write_unaligned(self as u16);
}
#[inline(always)]
unsafe fn write_u16_2x(v0: Self, v1: Self, ptr: *mut u8) {
ptr.cast::<[u16; 2]>()
.write_unaligned([v0 as u16, v1 as u16]);
}
#[inline(always)]
unsafe fn write_interleaved_3x_2x_u8(this: [[Self; 3]; 2], ptr: *mut u8) {
ptr.cast::<[[u8; 3]; 2]>()
.write_unaligned(this.map(|f| f.map(|f| f as u8)));
}
#[inline(always)]
unsafe fn write_interleaved_3x_2x_u16(this: [[Self; 3]; 2], ptr: *mut u8) {
ptr.cast::<[[u16; 3]; 2]>()
.write_unaligned(this.map(|f| f.map(|f| f as u16)));
}
#[inline(always)]
unsafe fn write_interleaved_4x_2x_u8(this: [[Self; 4]; 2], ptr: *mut u8) {
ptr.cast::<[[u8; 4]; 2]>()
.write_unaligned(this.map(|f| f.map(|f| f as u8)));
}
#[inline(always)]
unsafe fn write_interleaved_4x_2x_u16(this: [[Self; 4]; 2], ptr: *mut u8) {
ptr.cast::<[[u16; 4]; 2]>()
.write_unaligned(this.map(|f| f.map(|f| f as u16)));
}
#[inline(always)]
unsafe fn dyn_rgba_read<'a>(
v: &mut (dyn DynRgbaReader + 'a),
x: usize,
y: usize,
) -> RgbaBlock<Self> {
DynRgbaReaderSpec::<f32>::dyn_read(v, x, y)
}
}