#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline(always)]
#[allow(dead_code)]
pub unsafe fn sse_deinterleave_rgba(
rgba0: __m128i,
rgba1: __m128i,
rgba2: __m128i,
rgba3: __m128i,
) -> (__m128i, __m128i, __m128i, __m128i) {
let t0 = _mm_unpacklo_epi8(rgba0, rgba1); let t1 = _mm_unpackhi_epi8(rgba0, rgba1);
let t2 = _mm_unpacklo_epi8(rgba2, rgba3); let t3 = _mm_unpackhi_epi8(rgba2, rgba3);
let t4 = _mm_unpacklo_epi16(t0, t2); let t5 = _mm_unpackhi_epi16(t0, t2);
let t6 = _mm_unpacklo_epi16(t1, t3);
let t7 = _mm_unpackhi_epi16(t1, t3);
let l1 = _mm_unpacklo_epi32(t4, t6); let l2 = _mm_unpackhi_epi32(t4, t6);
let l3 = _mm_unpacklo_epi32(t5, t7);
let l4 = _mm_unpackhi_epi32(t5, t7);
#[rustfmt::skip]
let shuffle = _mm_setr_epi8(0, 4, 8, 12,
1, 5, 9, 13,
2, 6, 10, 14,
3, 7, 11, 15,
);
let r1 = _mm_shuffle_epi8(_mm_unpacklo_epi32(l1, l3), shuffle);
let r2 = _mm_shuffle_epi8(_mm_unpackhi_epi32(l1, l3), shuffle);
let r3 = _mm_shuffle_epi8(_mm_unpacklo_epi32(l2, l4), shuffle);
let r4 = _mm_shuffle_epi8(_mm_unpackhi_epi32(l2, l4), shuffle);
(r1, r2, r3, r4)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline(always)]
#[allow(dead_code)]
pub unsafe fn sse_interleave_rgba(
r: __m128i,
g: __m128i,
b: __m128i,
a: __m128i,
) -> (__m128i, __m128i, __m128i, __m128i) {
let rg_lo = _mm_unpacklo_epi8(r, g);
let rg_hi = _mm_unpackhi_epi8(r, g);
let ba_lo = _mm_unpacklo_epi8(b, a);
let ba_hi = _mm_unpackhi_epi8(b, a);
let rgba_0_lo = _mm_unpacklo_epi16(rg_lo, ba_lo);
let rgba_0_hi = _mm_unpackhi_epi16(rg_lo, ba_lo);
let rgba_1_lo = _mm_unpacklo_epi16(rg_hi, ba_hi);
let rgba_1_hi = _mm_unpackhi_epi16(rg_hi, ba_hi);
(rgba_0_lo, rgba_0_hi, rgba_1_lo, rgba_1_hi)
}