use core::arch::x86_64::*;
#[allow(unused_imports)]
#[cfg(feature = "rgb")]
pub(super) use crate::row::arch::x86_common::{
abgr_to_rgb_16_pixels, abgr_to_rgba_4_pixels, argb_to_rgb_16_pixels, argb_to_rgba_4_pixels,
bgra_to_rgb_16_pixels, bgrx_to_rgba_4_pixels, drop_alpha_16_pixels, rgbx_to_rgba_4_pixels,
swap_rb_16_pixels, swap_rb_alpha_4_pixels, x2bgr10_to_rgb_16_pixels, x2bgr10_to_rgb_u16_8_pixels,
x2bgr10_to_rgba_16_pixels, x2rgb10_to_rgb_16_pixels, x2rgb10_to_rgb_u16_8_pixels,
x2rgb10_to_rgba_16_pixels, xbgr_to_rgba_4_pixels, xrgb_to_rgba_4_pixels,
};
#[allow(unused_imports)]
#[cfg(any(
feature = "yuv-444-packed",
feature = "rgb-legacy",
feature = "mono",
feature = "rgb",
feature = "yuv-packed",
feature = "gbr",
feature = "yuv-semi-planar",
feature = "yuv-planar",
feature = "y2xx",
feature = "xyz",
))]
pub(super) use crate::row::arch::x86_common::{write_rgb_16, write_rgba_16};
#[allow(unused_imports)]
#[cfg(any(
feature = "yuv-444-packed",
feature = "rgb-legacy",
feature = "mono",
feature = "rgb",
feature = "gbr",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "y2xx",
))]
pub(super) use crate::row::arch::x86_common::{write_rgb_u16_8, write_rgba_u16_8};
#[allow(unused_imports)]
pub(super) use crate::{
ColorMatrix,
row::{
arch::x86_common::{deinterleave_rgb_16, rgb_to_hsv_16_pixels, rgb_to_luma_16_pixels},
scalar,
},
};
#[cfg(any(feature = "gbr", feature = "yuv-444-packed", feature = "yuva"))]
mod alpha_extract;
#[cfg(feature = "yuv-444-packed")]
mod ayuv64;
pub(crate) mod endian;
#[cfg(feature = "gray")]
mod gray;
mod hsv;
#[cfg(feature = "rgb-legacy")]
pub(crate) mod legacy_rgb;
#[cfg(feature = "mono")]
pub(crate) mod mono1bit;
#[cfg(feature = "rgb")]
mod packed_rgb;
#[cfg(feature = "rgb")]
mod packed_rgb_16bit;
#[cfg(feature = "rgb-float")]
mod packed_rgb_float;
#[cfg(feature = "yuv-packed")]
mod packed_yuv_4_1_1;
#[cfg(feature = "yuv-packed")]
mod packed_yuv_8bit;
#[cfg(feature = "gbr")]
mod planar_gbr;
#[cfg(feature = "gbr")]
mod planar_gbr_float;
#[cfg(feature = "gbr")]
mod planar_gbr_high_bit;
#[cfg(feature = "yuv-semi-planar")]
mod semi_planar_8bit;
#[cfg(all(feature = "yuv-planar", feature = "yuv-semi-planar"))]
mod subsampled_high_bit_pn_4_2_0;
#[cfg(feature = "yuv-semi-planar")]
mod subsampled_high_bit_pn_4_4_4;
#[cfg(feature = "v210")]
mod v210;
#[cfg(feature = "yuv-444-packed")]
mod v30x;
#[cfg(feature = "yuv-444-packed")]
mod v410;
#[cfg(feature = "yuv-444-packed")]
mod vuya;
#[cfg(feature = "yuv-444-packed")]
mod xv36;
#[cfg(all(feature = "xyz", any(feature = "std", feature = "alloc")))]
pub(crate) mod xyz12;
#[cfg(feature = "y2xx")]
mod y216;
#[cfg(feature = "y2xx")]
mod y2xx;
#[cfg(any(
feature = "gray",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "yuva",
))]
mod y_plane_to_luma_u16;
#[cfg(feature = "yuv-planar")]
mod yuv_planar_16bit;
#[cfg(feature = "yuv-planar")]
mod yuv_planar_8bit;
#[cfg(feature = "yuv-planar")]
mod yuv_planar_high_bit;
#[cfg(any(feature = "gbr", feature = "yuv-444-packed", feature = "yuva"))]
pub(crate) use alpha_extract::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use ayuv64::*;
#[cfg(feature = "gray")]
pub(crate) use gray::*;
pub(crate) use hsv::*;
#[cfg(feature = "rgb-legacy")]
#[allow(unused_imports)] pub(crate) use legacy_rgb::*;
#[cfg(feature = "mono")]
pub(crate) use mono1bit::*;
#[cfg(feature = "rgb")]
pub(crate) use packed_rgb::*;
#[cfg(feature = "rgb")]
#[allow(unused_imports)] pub(crate) use packed_rgb_16bit::*;
#[cfg(feature = "rgb-float")]
pub(crate) use packed_rgb_float::*;
#[cfg(feature = "yuv-packed")]
pub(crate) use packed_yuv_4_1_1::*;
#[cfg(feature = "yuv-packed")]
pub(crate) use packed_yuv_8bit::*;
#[cfg(feature = "gbr")]
pub(crate) use planar_gbr::*;
#[cfg(feature = "gbr")]
#[allow(unused_imports)] pub(crate) use planar_gbr_float::*;
#[cfg(feature = "gbr")]
#[allow(unused_imports)] pub(crate) use planar_gbr_high_bit::*;
#[cfg(feature = "yuv-semi-planar")]
pub(crate) use semi_planar_8bit::*;
#[cfg(all(feature = "yuv-planar", feature = "yuv-semi-planar"))]
pub(crate) use subsampled_high_bit_pn_4_2_0::*;
#[cfg(feature = "yuv-semi-planar")]
pub(crate) use subsampled_high_bit_pn_4_4_4::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use v30x::*;
#[cfg(feature = "v210")]
pub(crate) use v210::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use v410::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use vuya::*;
#[cfg(feature = "yuv-444-packed")]
pub(crate) use xv36::*;
#[cfg(any(
feature = "gray",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "yuva",
))]
pub(crate) use y_plane_to_luma_u16::*;
#[cfg(feature = "y2xx")]
pub(crate) use y2xx::*;
#[cfg(feature = "y2xx")]
pub(crate) use y216::*;
#[cfg(feature = "yuv-planar")]
pub(crate) use yuv_planar_8bit::*;
#[cfg(feature = "yuv-planar")]
pub(crate) use yuv_planar_16bit::*;
#[cfg(feature = "yuv-planar")]
pub(crate) use yuv_planar_high_bit::*;
#[cfg(any(
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "v210",
feature = "yuv-444-packed",
feature = "y2xx",
))]
#[inline(always)]
pub(super) fn clamp_u16_max_x16(v: __m256i, zero_v: __m256i, max_v: __m256i) -> __m256i {
unsafe { _mm256_min_epi16(_mm256_max_epi16(v, zero_v), max_v) }
}
#[cfg(feature = "yuv-semi-planar")]
#[inline(always)]
pub(super) unsafe fn deinterleave_uv_u16_avx2(ptr: *const u16) -> (__m256i, __m256i) {
unsafe {
let split_mask = _mm256_setr_epi8(
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, );
let uv0 = _mm256_loadu_si256(ptr.cast());
let uv1 = _mm256_loadu_si256(ptr.add(16).cast());
let s0 = _mm256_shuffle_epi8(uv0, split_mask);
let s1 = _mm256_shuffle_epi8(uv1, split_mask);
let s0_p = _mm256_permute4x64_epi64::<0xD8>(s0);
let s1_p = _mm256_permute4x64_epi64::<0xD8>(s1);
let u_vec = _mm256_permute2x128_si256::<0x20>(s0_p, s1_p);
let v_vec = _mm256_permute2x128_si256::<0x31>(s0_p, s1_p);
(u_vec, v_vec)
}
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-packed",
feature = "yuv-semi-planar",
feature = "v210",
feature = "y2xx",
feature = "yuv-planar",
))]
#[inline(always)]
pub(super) fn q15_shift(v: __m256i) -> __m256i {
unsafe { _mm256_srai_epi32::<15>(v) }
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-packed",
feature = "yuv-semi-planar",
feature = "v210",
feature = "y2xx",
feature = "yuv-planar",
))]
#[inline(always)]
pub(super) fn chroma_i16x16(
cu: __m256i,
cv: __m256i,
u_d_lo: __m256i,
v_d_lo: __m256i,
u_d_hi: __m256i,
v_d_hi: __m256i,
rnd: __m256i,
) -> __m256i {
unsafe {
let lo = _mm256_srai_epi32::<15>(_mm256_add_epi32(
_mm256_add_epi32(
_mm256_mullo_epi32(cu, u_d_lo),
_mm256_mullo_epi32(cv, v_d_lo),
),
rnd,
));
let hi = _mm256_srai_epi32::<15>(_mm256_add_epi32(
_mm256_add_epi32(
_mm256_mullo_epi32(cu, u_d_hi),
_mm256_mullo_epi32(cv, v_d_hi),
),
rnd,
));
_mm256_permute4x64_epi64::<0xD8>(_mm256_packs_epi32(lo, hi))
}
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-packed",
feature = "yuv-semi-planar",
feature = "v210",
feature = "y2xx",
feature = "yuv-planar",
))]
#[inline(always)]
pub(super) fn scale_y(
y_i16: __m256i,
y_off_v: __m256i,
y_scale_v: __m256i,
rnd: __m256i,
) -> __m256i {
unsafe {
let shifted = _mm256_sub_epi16(y_i16, y_off_v);
let lo_i32 = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(shifted));
let hi_i32 = _mm256_cvtepi16_epi32(_mm256_extracti128_si256::<1>(shifted));
let lo_scaled =
_mm256_srai_epi32::<15>(_mm256_add_epi32(_mm256_mullo_epi32(lo_i32, y_scale_v), rnd));
let hi_scaled =
_mm256_srai_epi32::<15>(_mm256_add_epi32(_mm256_mullo_epi32(hi_i32, y_scale_v), rnd));
_mm256_permute4x64_epi64::<0xD8>(_mm256_packs_epi32(lo_scaled, hi_scaled))
}
}
#[cfg(any(
feature = "yuv-packed",
feature = "yuv-semi-planar",
feature = "v210",
feature = "y2xx",
feature = "yuv-planar",
))]
#[inline(always)]
pub(super) fn chroma_dup(chroma: __m256i) -> (__m256i, __m256i) {
unsafe {
let a = _mm256_unpacklo_epi16(chroma, chroma);
let b = _mm256_unpackhi_epi16(chroma, chroma);
let lo16 = _mm256_permute2x128_si256::<0x20>(a, b);
let hi16 = _mm256_permute2x128_si256::<0x31>(a, b);
(lo16, hi16)
}
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-packed",
feature = "yuv-semi-planar",
feature = "v210",
feature = "y2xx",
feature = "yuv-planar",
))]
#[inline(always)]
pub(super) fn narrow_u8x32(lo: __m256i, hi: __m256i) -> __m256i {
unsafe { _mm256_permute4x64_epi64::<0xD8>(_mm256_packus_epi16(lo, hi)) }
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-packed",
feature = "yuv-semi-planar",
feature = "y2xx",
feature = "yuv-planar",
))]
#[inline(always)]
pub(super) unsafe fn write_rgb_32(r: __m256i, g: __m256i, b: __m256i, ptr: *mut u8) {
unsafe {
let r_lo = _mm256_castsi256_si128(r);
let r_hi = _mm256_extracti128_si256::<1>(r);
let g_lo = _mm256_castsi256_si128(g);
let g_hi = _mm256_extracti128_si256::<1>(g);
let b_lo = _mm256_castsi256_si128(b);
let b_hi = _mm256_extracti128_si256::<1>(b);
write_rgb_16(r_lo, g_lo, b_lo, ptr);
write_rgb_16(r_hi, g_hi, b_hi, ptr.add(48));
}
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-packed",
feature = "yuv-semi-planar",
feature = "y2xx",
feature = "yuv-planar",
))]
#[inline(always)]
pub(super) unsafe fn write_rgba_32(r: __m256i, g: __m256i, b: __m256i, a: __m256i, ptr: *mut u8) {
unsafe {
let r_lo = _mm256_castsi256_si128(r);
let r_hi = _mm256_extracti128_si256::<1>(r);
let g_lo = _mm256_castsi256_si128(g);
let g_hi = _mm256_extracti128_si256::<1>(g);
let b_lo = _mm256_castsi256_si128(b);
let b_hi = _mm256_extracti128_si256::<1>(b);
let a_lo = _mm256_castsi256_si128(a);
let a_hi = _mm256_extracti128_si256::<1>(a);
write_rgba_16(r_lo, g_lo, b_lo, a_lo, ptr);
write_rgba_16(r_hi, g_hi, b_hi, a_hi, ptr.add(64));
}
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "y2xx",
))]
#[inline(always)]
pub(super) fn scale_y_u16_avx2(
y_u16x16: __m256i,
y_off_v: __m256i,
y_scale_v: __m256i,
rnd_v: __m256i,
) -> __m256i {
unsafe {
let y_lo_i32 = _mm256_sub_epi32(
_mm256_cvtepu16_epi32(_mm256_castsi256_si128(y_u16x16)),
y_off_v,
);
let y_hi_i32 = _mm256_sub_epi32(
_mm256_cvtepu16_epi32(_mm256_extracti128_si256::<1>(y_u16x16)),
y_off_v,
);
let lo = _mm256_srai_epi32::<15>(_mm256_add_epi32(
_mm256_mullo_epi32(y_lo_i32, y_scale_v),
rnd_v,
));
let hi = _mm256_srai_epi32::<15>(_mm256_add_epi32(
_mm256_mullo_epi32(y_hi_i32, y_scale_v),
rnd_v,
));
_mm256_permute4x64_epi64::<0xD8>(_mm256_packs_epi32(lo, hi))
}
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "y2xx",
))]
#[inline(always)]
pub(super) fn srai64_15_x4(x: __m256i) -> __m256i {
unsafe {
#[cfg(debug_assertions)]
{
let lane0 = _mm256_extract_epi64::<0>(x);
debug_assert!(
lane0.unsigned_abs() < (1u64 << 32),
"srai64_15_x4: lane 0 = {lane0} exceeds the 2^32 bias-trick bound \
(see worst-case table in helper docstring)",
);
}
let biased = _mm256_add_epi64(x, _mm256_set1_epi64x(1i64 << 32));
let shifted = _mm256_srli_epi64::<15>(biased);
_mm256_sub_epi64(shifted, _mm256_set1_epi64x(1i64 << 17))
}
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "y2xx",
))]
#[inline(always)]
pub(super) fn chroma_i64x4_avx2(
cu: __m256i,
cv: __m256i,
u_d: __m256i,
v_d: __m256i,
rnd_v: __m256i,
) -> __m256i {
unsafe {
srai64_15_x4(_mm256_add_epi64(
_mm256_add_epi64(_mm256_mul_epi32(cu, u_d), _mm256_mul_epi32(cv, v_d)),
rnd_v,
))
}
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "y2xx",
))]
#[inline(always)]
pub(super) fn reassemble_i64x4_to_i32x8(even: __m256i, odd: __m256i) -> __m256i {
unsafe {
_mm256_unpacklo_epi64(
_mm256_unpacklo_epi32(even, odd),
_mm256_unpackhi_epi32(even, odd),
)
}
}
#[cfg(any(
feature = "yuv-444-packed",
feature = "yuv-planar",
feature = "yuv-semi-planar",
feature = "y2xx",
))]
#[inline(always)]
pub(super) fn scale_y_i32x8_i64(
y_minus_off: __m256i,
y_scale_v: __m256i,
rnd_v: __m256i,
) -> __m256i {
unsafe {
let even = srai64_15_x4(_mm256_add_epi64(
_mm256_mul_epi32(y_minus_off, y_scale_v),
rnd_v,
));
let odd = srai64_15_x4(_mm256_add_epi64(
_mm256_mul_epi32(_mm256_shuffle_epi32::<0xF5>(y_minus_off), y_scale_v),
rnd_v,
));
reassemble_i64x4_to_i32x8(even, odd)
}
}
#[cfg(any(feature = "yuv-planar", feature = "y2xx"))]
#[inline(always)]
pub(super) fn chroma_dup_i32(chroma: __m256i) -> (__m256i, __m256i) {
unsafe {
let a = _mm256_unpacklo_epi32(chroma, chroma);
let b = _mm256_unpackhi_epi32(chroma, chroma);
let lo = _mm256_permute2x128_si256::<0x20>(a, b);
let hi = _mm256_permute2x128_si256::<0x31>(a, b);
(lo, hi)
}
}
#[cfg(all(test, feature = "std"))]
mod tests;