use core::arch::wasm32::*;
#[allow(unused_imports)]
pub(super) use crate::{ColorMatrix, row::scalar};
#[cfg(any(feature = "gbr", feature = "yuv-444-packed", feature = "yuva"))]
pub(crate) mod alpha_extract;
#[cfg(feature = "yuv-444-packed")]
mod ayuv64;
pub(crate) mod endian;
#[cfg(feature = "gray")]
mod gray;
mod hsv;
#[cfg(feature = "rgb-legacy")]
pub(crate) mod legacy_rgb;
#[cfg(feature = "mono")]
mod mono1bit;
#[cfg(feature = "rgb")]
mod packed_rgb;
#[cfg(feature = "rgb")]
mod packed_rgb_16bit;
#[cfg(feature = "rgb-float")]
mod packed_rgb_float;
#[cfg(feature = "yuv-packed")]
mod packed_yuv_4_1_1;
#[cfg(feature = "yuv-packed")]
mod packed_yuv_8bit;
#[cfg(feature = "gbr")]
mod planar_gbr;
#[cfg(feature = "gbr")]
mod planar_gbr_float;
#[cfg(feature = "gbr")]
mod planar_gbr_high_bit;
#[cfg(feature = "yuv-semi-planar")]
mod semi_planar_8bit;
#[cfg(all(feature = "yuv-planar", feature = "yuv-semi-planar"))]
mod subsampled_high_bit_pn_4_2_0;
#[cfg(feature = "yuv-semi-planar")]
mod subsampled_high_bit_pn_4_4_4;
#[cfg(feature = "v210")]
mod v210;
#[cfg(feature = "yuv-444-packed")]
mod v30x;
#[cfg(feature = "yuv-444-packed")]
mod v410;
#[cfg(feature = "yuv-444-packed")]
mod vuya;
#[cfg(feature = "yuv-444-packed")]
mod xv36;
#[cfg(all(feature = "xyz", any(feature = "std", feature = "alloc")))]
pub(crate) mod xyz12;
#[cfg(feature = "y2xx")]
mod y216;
#[cfg(feature = "y2xx")]
mod y2xx;
#[cfg(any(
feature = "gray",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "yuva",
))]
mod y_plane_to_luma_u16;
#[cfg(feature = "yuv-planar")]
mod yuv_planar_16bit;
#[cfg(feature = "yuv-planar")]
mod yuv_planar_8bit;
#[cfg(feature = "yuv-planar")]
mod yuv_planar_high_bit;
#[cfg(any(feature = "gbr", feature = "yuv-444-packed", feature = "yuva"))]
pub(crate) use alpha_extract::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use ayuv64::*;
#[cfg(feature = "gray")]
pub(crate) use gray::*;
pub(crate) use hsv::*;
#[cfg(feature = "mono")]
pub(crate) use mono1bit::*;
#[cfg(feature = "rgb")]
pub(crate) use packed_rgb::*;
#[cfg(feature = "rgb")]
#[allow(unused_imports)]
pub(crate) use packed_rgb_16bit::*;
#[cfg(feature = "rgb-float")]
pub(crate) use packed_rgb_float::*;
#[cfg(feature = "yuv-packed")]
pub(crate) use packed_yuv_4_1_1::*;
#[cfg(feature = "yuv-packed")]
pub(crate) use packed_yuv_8bit::*;
#[cfg(feature = "gbr")]
pub(crate) use planar_gbr::*;
#[cfg(feature = "gbr")]
pub(crate) use planar_gbr_float::*;
#[cfg(feature = "gbr")]
pub(crate) use planar_gbr_high_bit::*;
#[cfg(feature = "yuv-semi-planar")]
pub(crate) use semi_planar_8bit::*;
#[cfg(all(feature = "yuv-planar", feature = "yuv-semi-planar"))]
pub(crate) use subsampled_high_bit_pn_4_2_0::*;
#[cfg(feature = "yuv-semi-planar")]
pub(crate) use subsampled_high_bit_pn_4_4_4::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use v30x::*;
#[cfg(feature = "v210")]
pub(crate) use v210::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use v410::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use vuya::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use xv36::*;
#[cfg(any(
feature = "gray",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "yuva",
))]
pub(crate) use y_plane_to_luma_u16::*;
#[cfg(feature = "y2xx")]
pub(crate) use y2xx::*;
#[cfg(feature = "y2xx")]
pub(crate) use y216::*;
#[cfg(feature = "yuv-planar")]
pub(crate) use yuv_planar_8bit::*;
#[cfg(feature = "yuv-planar")]
pub(crate) use yuv_planar_16bit::*;
#[cfg(feature = "yuv-planar")]
pub(crate) use yuv_planar_high_bit::*;
#[inline(always)]
pub(super) fn clamp_u16_max_wasm(v: v128, zero_v: v128, max_v: v128) -> v128 {
i16x8_min(i16x8_max(v, zero_v), max_v)
}
#[inline(always)]
pub(super) unsafe fn write_rgb_u16_8(r: v128, g: v128, b: v128, ptr: *mut u16) {
unsafe {
let r0 = i8x16(0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1);
let g0 = i8x16(-1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5);
let b0 = i8x16(-1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1);
let out0 = v128_or(
v128_or(u8x16_swizzle(r, r0), u8x16_swizzle(g, g0)),
u8x16_swizzle(b, b0),
);
let r1 = i8x16(-1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11);
let g1 = i8x16(-1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1);
let b1 = i8x16(4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1);
let out1 = v128_or(
v128_or(u8x16_swizzle(r, r1), u8x16_swizzle(g, g1)),
u8x16_swizzle(b, b1),
);
let r2 = i8x16(
-1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1,
);
let g2 = i8x16(
10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1,
);
let b2 = i8x16(
-1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15,
);
let out2 = v128_or(
v128_or(u8x16_swizzle(r, r2), u8x16_swizzle(g, g2)),
u8x16_swizzle(b, b2),
);
v128_store(ptr.cast(), out0);
v128_store(ptr.add(8).cast(), out1);
v128_store(ptr.add(16).cast(), out2);
}
}
#[inline(always)]
pub(super) unsafe fn write_rgba_u16_8(r: v128, g: v128, b: v128, a: v128, ptr: *mut u16) {
unsafe {
let rg_lo = i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(r, g);
let rg_hi = i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(r, g);
let ba_lo = i16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(b, a);
let ba_hi = i16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(b, a);
let q0 = i16x8_shuffle::<0, 1, 8, 9, 2, 3, 10, 11>(rg_lo, ba_lo);
let q1 = i16x8_shuffle::<4, 5, 12, 13, 6, 7, 14, 15>(rg_lo, ba_lo);
let q2 = i16x8_shuffle::<0, 1, 8, 9, 2, 3, 10, 11>(rg_hi, ba_hi);
let q3 = i16x8_shuffle::<4, 5, 12, 13, 6, 7, 14, 15>(rg_hi, ba_hi);
v128_store(ptr.cast(), q0);
v128_store(ptr.add(8).cast(), q1);
v128_store(ptr.add(16).cast(), q2);
v128_store(ptr.add(24).cast(), q3);
}
}
#[inline(always)]
pub(super) unsafe fn deinterleave_uv_u16_wasm(ptr: *const u16) -> (v128, v128) {
unsafe {
let split_mask = i8x16(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
let chunk0 = v128_load(ptr.cast());
let chunk1 = v128_load(ptr.add(8).cast());
let s0 = u8x16_swizzle(chunk0, split_mask);
let s1 = u8x16_swizzle(chunk1, split_mask);
let u_vec = i8x16_shuffle::<0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23>(s0, s1);
let v_vec =
i8x16_shuffle::<8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31>(s0, s1);
(u_vec, v_vec)
}
}
#[inline(always)]
pub(super) fn q15_shift(v: v128) -> v128 {
i32x4_shr(v, 15)
}
#[inline(always)]
pub(super) fn chroma_i16x8(
cu: v128,
cv: v128,
u_d_lo: v128,
v_d_lo: v128,
u_d_hi: v128,
v_d_hi: v128,
rnd: v128,
) -> v128 {
let lo = i32x4_shr(
i32x4_add(i32x4_add(i32x4_mul(cu, u_d_lo), i32x4_mul(cv, v_d_lo)), rnd),
15,
);
let hi = i32x4_shr(
i32x4_add(i32x4_add(i32x4_mul(cu, u_d_hi), i32x4_mul(cv, v_d_hi)), rnd),
15,
);
i16x8_narrow_i32x4(lo, hi)
}
#[inline(always)]
pub(super) fn scale_y(y_i16: v128, y_off_v: v128, y_scale_v: v128, rnd: v128) -> v128 {
let shifted = i16x8_sub(y_i16, y_off_v);
let lo_i32 = i32x4_extend_low_i16x8(shifted);
let hi_i32 = i32x4_extend_high_i16x8(shifted);
let lo_scaled = i32x4_shr(i32x4_add(i32x4_mul(lo_i32, y_scale_v), rnd), 15);
let hi_scaled = i32x4_shr(i32x4_add(i32x4_mul(hi_i32, y_scale_v), rnd), 15);
i16x8_narrow_i32x4(lo_scaled, hi_scaled)
}
#[inline(always)]
pub(super) fn u8_low_to_i16x8(v: v128) -> v128 {
i8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(v, i16x8_splat(0))
}
#[inline(always)]
pub(super) fn u8_high_to_i16x8(v: v128) -> v128 {
i8x16_shuffle::<8, 16, 9, 17, 10, 18, 11, 19, 12, 20, 13, 21, 14, 22, 15, 23>(v, i16x8_splat(0))
}
#[inline(always)]
pub(super) fn dup_lo(chroma: v128) -> v128 {
i8x16_shuffle::<0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7>(chroma, chroma)
}
#[inline(always)]
pub(super) fn dup_hi(chroma: v128) -> v128 {
i8x16_shuffle::<8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 14, 15>(chroma, chroma)
}
#[inline(always)]
pub(super) unsafe fn write_rgb_16(r: v128, g: v128, b: v128, ptr: *mut u8) {
unsafe {
let r0 = i8x16(0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5);
let g0 = i8x16(-1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1);
let b0 = i8x16(-1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1);
let out0 = v128_or(
v128_or(u8x16_swizzle(r, r0), u8x16_swizzle(g, g0)),
u8x16_swizzle(b, b0),
);
let r1 = i8x16(-1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1);
let g1 = i8x16(5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10);
let b1 = i8x16(-1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1);
let out1 = v128_or(
v128_or(u8x16_swizzle(r, r1), u8x16_swizzle(g, g1)),
u8x16_swizzle(b, b1),
);
let r2 = i8x16(
-1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1,
);
let g2 = i8x16(
-1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1,
);
let b2 = i8x16(
10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15,
);
let out2 = v128_or(
v128_or(u8x16_swizzle(r, r2), u8x16_swizzle(g, g2)),
u8x16_swizzle(b, b2),
);
v128_store(ptr.cast(), out0);
v128_store(ptr.add(16).cast(), out1);
v128_store(ptr.add(32).cast(), out2);
}
}
#[inline(always)]
pub(super) unsafe fn write_rgba_16(r: v128, g: v128, b: v128, a: v128, ptr: *mut u8) {
unsafe {
let r0 = i8x16(0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1);
let g0 = i8x16(-1, 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1, -1);
let b0 = i8x16(-1, -1, 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1);
let a0 = i8x16(-1, -1, -1, 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3);
let out0 = v128_or(
v128_or(u8x16_swizzle(r, r0), u8x16_swizzle(g, g0)),
v128_or(u8x16_swizzle(b, b0), u8x16_swizzle(a, a0)),
);
let r1 = i8x16(4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, 7, -1, -1, -1);
let g1 = i8x16(-1, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, 7, -1, -1);
let b1 = i8x16(-1, -1, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, 7, -1);
let a1 = i8x16(-1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, 7);
let out1 = v128_or(
v128_or(u8x16_swizzle(r, r1), u8x16_swizzle(g, g1)),
v128_or(u8x16_swizzle(b, b1), u8x16_swizzle(a, a1)),
);
let r2 = i8x16(8, -1, -1, -1, 9, -1, -1, -1, 10, -1, -1, -1, 11, -1, -1, -1);
let g2 = i8x16(-1, 8, -1, -1, -1, 9, -1, -1, -1, 10, -1, -1, -1, 11, -1, -1);
let b2 = i8x16(-1, -1, 8, -1, -1, -1, 9, -1, -1, -1, 10, -1, -1, -1, 11, -1);
let a2 = i8x16(-1, -1, -1, 8, -1, -1, -1, 9, -1, -1, -1, 10, -1, -1, -1, 11);
let out2 = v128_or(
v128_or(u8x16_swizzle(r, r2), u8x16_swizzle(g, g2)),
v128_or(u8x16_swizzle(b, b2), u8x16_swizzle(a, a2)),
);
let r3 = i8x16(
12, -1, -1, -1, 13, -1, -1, -1, 14, -1, -1, -1, 15, -1, -1, -1,
);
let g3 = i8x16(
-1, 12, -1, -1, -1, 13, -1, -1, -1, 14, -1, -1, -1, 15, -1, -1,
);
let b3 = i8x16(
-1, -1, 12, -1, -1, -1, 13, -1, -1, -1, 14, -1, -1, -1, 15, -1,
);
let a3 = i8x16(
-1, -1, -1, 12, -1, -1, -1, 13, -1, -1, -1, 14, -1, -1, -1, 15,
);
let out3 = v128_or(
v128_or(u8x16_swizzle(r, r3), u8x16_swizzle(g, g3)),
v128_or(u8x16_swizzle(b, b3), u8x16_swizzle(a, a3)),
);
v128_store(ptr.cast(), out0);
v128_store(ptr.add(16).cast(), out1);
v128_store(ptr.add(32).cast(), out2);
v128_store(ptr.add(48).cast(), out3);
}
}
#[inline(always)]
pub(super) fn scale_y_u16_wasm(y_u16: v128, y_off32_v: v128, y_scale_v: v128, rnd_v: v128) -> v128 {
let lo_u32 = u32x4_extend_low_u16x8(y_u16);
let hi_u32 = u32x4_extend_high_u16x8(y_u16);
let lo_i32 = i32x4_sub(lo_u32, y_off32_v);
let hi_i32 = i32x4_sub(hi_u32, y_off32_v);
let lo = q15_shift(i32x4_add(i32x4_mul(lo_i32, y_scale_v), rnd_v));
let hi = q15_shift(i32x4_add(i32x4_mul(hi_i32, y_scale_v), rnd_v));
i16x8_narrow_i32x4(lo, hi)
}
#[inline(always)]
pub(super) fn chroma_i64x2_wasm(
cu_i64: v128,
cv_i64: v128,
u_d_i64: v128,
v_d_i64: v128,
rnd_i64: v128,
) -> v128 {
let sum = i64x2_add(
i64x2_add(i64x2_mul(cu_i64, u_d_i64), i64x2_mul(cv_i64, v_d_i64)),
rnd_i64,
);
i64x2_shr(sum, 15)
}
#[inline(always)]
pub(super) fn combine_i64x2_pair_to_i32x4(lo: v128, hi: v128) -> v128 {
i8x16_shuffle::<0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27>(lo, hi)
}
#[inline(always)]
pub(super) fn chroma_dup_i32x4_u16(chroma: v128) -> (v128, v128) {
let lo = i8x16_shuffle::<0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7>(chroma, chroma);
let hi =
i8x16_shuffle::<8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15>(chroma, chroma);
(lo, hi)
}
#[inline(always)]
pub(super) fn scale_y_i32x4_i64_wasm(y_minus_off: v128, y_scale_i64: v128, rnd_i64: v128) -> v128 {
let lo = i64x2_shr(
i64x2_add(
i64x2_mul(y_scale_i64, i64x2_extend_low_i32x4(y_minus_off)),
rnd_i64,
),
15,
);
let hi = i64x2_shr(
i64x2_add(
i64x2_mul(y_scale_i64, i64x2_extend_high_i32x4(y_minus_off)),
rnd_i64,
),
15,
);
combine_i64x2_pair_to_i32x4(lo, hi)
}
#[cfg(all(test, feature = "std"))]
mod tests;