use crate::sse::_shuffle;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[inline(always)]
pub(crate) fn _mm_deinterleave_rgba(
rgba0: __m128i,
rgba1: __m128i,
rgba2: __m128i,
rgba3: __m128i,
) -> (__m128i, __m128i, __m128i, __m128i) {
unsafe {
let t0 = _mm_unpacklo_epi8(rgba0, rgba1); let t1 = _mm_unpackhi_epi8(rgba0, rgba1);
let t2 = _mm_unpacklo_epi8(rgba2, rgba3); let t3 = _mm_unpackhi_epi8(rgba2, rgba3);
let t4 = _mm_unpacklo_epi16(t0, t2); let t5 = _mm_unpackhi_epi16(t0, t2);
let t6 = _mm_unpacklo_epi16(t1, t3);
let t7 = _mm_unpackhi_epi16(t1, t3);
let l1 = _mm_unpacklo_epi32(t4, t6); let l2 = _mm_unpackhi_epi32(t4, t6);
let l3 = _mm_unpacklo_epi32(t5, t7);
let l4 = _mm_unpackhi_epi32(t5, t7);
#[rustfmt::skip]
let shuffle = _mm_setr_epi8(0, 4, 8, 12,
1, 5, 9, 13,
2, 6, 10, 14,
3, 7, 11, 15,
);
let r1 = _mm_shuffle_epi8(_mm_unpacklo_epi32(l1, l3), shuffle);
let r2 = _mm_shuffle_epi8(_mm_unpackhi_epi32(l1, l3), shuffle);
let r3 = _mm_shuffle_epi8(_mm_unpacklo_epi32(l2, l4), shuffle);
let r4 = _mm_shuffle_epi8(_mm_unpackhi_epi32(l2, l4), shuffle);
(r1, r2, r3, r4)
}
}
#[inline(always)]
pub(crate) fn _mm_deinterleave_rgb(
s0: __m128i,
s1: __m128i,
s2: __m128i,
) -> (__m128i, __m128i, __m128i) {
unsafe {
let m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
let m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
let a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1);
let b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1);
let c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1);
let sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13);
let sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14);
let sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
let a0 = _mm_shuffle_epi8(a0, sh_b);
let b0 = _mm_shuffle_epi8(b0, sh_g);
let c0 = _mm_shuffle_epi8(c0, sh_r);
(a0, b0, c0)
}
}
#[inline(always)]
pub(crate) fn _mm_interleave_rgb(
r: __m128i,
g: __m128i,
b: __m128i,
) -> (__m128i, __m128i, __m128i) {
unsafe {
let sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
let sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
let sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
let a0 = _mm_shuffle_epi8(r, sh_a);
let b0 = _mm_shuffle_epi8(g, sh_b);
let c0 = _mm_shuffle_epi8(b, sh_c);
let m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
let m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
let v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0);
let v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0);
let v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0);
(v0, v1, v2)
}
}
#[inline(always)]
pub(crate) fn _mm_interleave_rgba(
r: __m128i,
g: __m128i,
b: __m128i,
a: __m128i,
) -> (__m128i, __m128i, __m128i, __m128i) {
unsafe {
let rg_lo = _mm_unpacklo_epi8(r, g);
let rg_hi = _mm_unpackhi_epi8(r, g);
let ba_lo = _mm_unpacklo_epi8(b, a);
let ba_hi = _mm_unpackhi_epi8(b, a);
let rgba_0_lo = _mm_unpacklo_epi16(rg_lo, ba_lo);
let rgba_0_hi = _mm_unpackhi_epi16(rg_lo, ba_lo);
let rgba_1_lo = _mm_unpacklo_epi16(rg_hi, ba_hi);
let rgba_1_hi = _mm_unpackhi_epi16(rg_hi, ba_hi);
(rgba_0_lo, rgba_0_hi, rgba_1_lo, rgba_1_hi)
}
}
#[inline(always)]
pub(crate) fn _mm_deinterleave_rgba_ps(
t0: __m128,
t1: __m128,
t2: __m128,
t3: __m128,
) -> (__m128, __m128, __m128, __m128) {
unsafe {
let t02lo = _mm_unpacklo_ps(t0, t2);
let t13lo = _mm_unpacklo_ps(t1, t3);
let t02hi = _mm_unpackhi_ps(t0, t2);
let t13hi = _mm_unpackhi_ps(t1, t3);
let v0 = _mm_unpacklo_ps(t02lo, t13lo);
let v1 = _mm_unpackhi_ps(t02lo, t13lo);
let v2 = _mm_unpacklo_ps(t02hi, t13hi);
let v3 = _mm_unpackhi_ps(t02hi, t13hi);
(v0, v1, v2, v3)
}
}
#[inline(always)]
pub(crate) fn _mm_deinterleave_rgb_ps(
t0: __m128,
t1: __m128,
t2: __m128,
) -> (__m128, __m128, __m128) {
unsafe {
const FLAG_1: i32 = _shuffle(0, 1, 0, 2);
let at12 = _mm_shuffle_ps::<FLAG_1>(t1, t2);
const FLAG_2: i32 = _shuffle(2, 0, 3, 0);
let v0 = _mm_shuffle_ps::<FLAG_2>(t0, at12);
const FLAG_3: i32 = _shuffle(0, 0, 0, 1);
let bt01 = _mm_shuffle_ps::<FLAG_3>(t0, t1);
const FLAG_4: i32 = _shuffle(0, 2, 0, 3);
let bt12 = _mm_shuffle_ps::<FLAG_4>(t1, t2);
const FLAG_5: i32 = _shuffle(2, 0, 2, 0);
let v1 = _mm_shuffle_ps::<FLAG_5>(bt01, bt12);
const FLAG_6: i32 = _shuffle(0, 1, 0, 2);
let ct01 = _mm_shuffle_ps::<FLAG_6>(t0, t1);
const FLAG_7: i32 = _shuffle(3, 0, 2, 0);
let v2 = _mm_shuffle_ps::<FLAG_7>(ct01, t2);
(v0, v1, v2)
}
}
#[inline(always)]
pub(crate) fn _mm_interleave_rgb_ps(
t0: __m128,
t1: __m128,
t2: __m128,
) -> (__m128, __m128, __m128) {
unsafe {
const FLAG_1: i32 = _shuffle(0, 0, 0, 0);
let u0 = _mm_shuffle_ps::<FLAG_1>(t0, t1);
const FLAG_2: i32 = _shuffle(1, 1, 0, 0);
let u1 = _mm_shuffle_ps::<FLAG_2>(t2, t0);
const FLAG_3: i32 = _shuffle(2, 0, 2, 0);
let v0 = _mm_shuffle_ps::<FLAG_3>(u0, u1);
const FLAG_4: i32 = _shuffle(1, 1, 1, 1);
let u2 = _mm_shuffle_ps::<FLAG_4>(t1, t2);
const FLAG_5: i32 = _shuffle(2, 2, 2, 2);
let u3 = _mm_shuffle_ps::<FLAG_5>(t0, t1);
const FLAG_6: i32 = _shuffle(2, 0, 2, 0);
let v1 = _mm_shuffle_ps::<FLAG_6>(u2, u3);
const FLAG_7: i32 = _shuffle(3, 3, 2, 2);
let u4 = _mm_shuffle_ps::<FLAG_7>(t2, t0);
const FLAG_8: i32 = _shuffle(3, 3, 3, 3);
let u5 = _mm_shuffle_ps::<FLAG_8>(t1, t2);
const FLAG_9: i32 = _shuffle(2, 0, 2, 0);
let v2 = _mm_shuffle_ps::<FLAG_9>(u4, u5);
(v0, v1, v2)
}
}
#[inline(always)]
pub(crate) fn _mm_interleave_rgba_ps(
t0: __m128,
t1: __m128,
t2: __m128,
t3: __m128,
) -> (__m128, __m128, __m128, __m128) {
unsafe {
let u0 = _mm_unpacklo_ps(t0, t2);
let u1 = _mm_unpacklo_ps(t1, t3);
let u2 = _mm_unpackhi_ps(t0, t2);
let u3 = _mm_unpackhi_ps(t1, t3);
let v0 = _mm_unpacklo_ps(u0, u1);
let v2 = _mm_unpacklo_ps(u2, u3);
let v1 = _mm_unpackhi_ps(u0, u1);
let v3 = _mm_unpackhi_ps(u2, u3);
(v0, v1, v2, v3)
}
}