#![allow(dead_code)]
use std::arch::x86_64::*;
#[inline(always)]
pub(crate) fn _mm256_deinterleave_rgba_epi8(
rgba0: __m256i,
rgba1: __m256i,
rgba2: __m256i,
rgba3: __m256i,
) -> (__m256i, __m256i, __m256i, __m256i) {
unsafe {
#[rustfmt::skip]
let sh = _mm256_setr_epi8(
0, 4, 8, 12, 1, 5,
9, 13, 2, 6, 10, 14,
3, 7, 11, 15, 0, 4,
8, 12, 1, 5, 9, 13,
2, 6, 10, 14, 3, 7,
11, 15,
);
let p0 = _mm256_shuffle_epi8(rgba0, sh);
let p1 = _mm256_shuffle_epi8(rgba1, sh);
let p2 = _mm256_shuffle_epi8(rgba2, sh);
let p3 = _mm256_shuffle_epi8(rgba3, sh);
let p01l = _mm256_unpacklo_epi32(p0, p1);
let p01h = _mm256_unpackhi_epi32(p0, p1);
let p23l = _mm256_unpacklo_epi32(p2, p3);
let p23h = _mm256_unpackhi_epi32(p2, p3);
let pll = _mm256_permute2x128_si256::<32>(p01l, p23l);
let plh = _mm256_permute2x128_si256::<49>(p01l, p23l);
let phl = _mm256_permute2x128_si256::<32>(p01h, p23h);
let phh = _mm256_permute2x128_si256::<49>(p01h, p23h);
let b0 = _mm256_unpacklo_epi32(pll, plh);
let g0 = _mm256_unpackhi_epi32(pll, plh);
let r0 = _mm256_unpacklo_epi32(phl, phh);
let a0 = _mm256_unpackhi_epi32(phl, phh);
(b0, g0, r0, a0)
}
}
#[inline(always)]
pub(crate) fn _mm256_interleave_rgba_epi8(
vals: (__m256i, __m256i, __m256i, __m256i),
) -> (__m256i, __m256i, __m256i, __m256i) {
unsafe {
let bg0 = _mm256_unpacklo_epi8(vals.0, vals.1);
let bg1 = _mm256_unpackhi_epi8(vals.0, vals.1);
let ra0 = _mm256_unpacklo_epi8(vals.2, vals.3);
let ra1 = _mm256_unpackhi_epi8(vals.2, vals.3);
let rgba0_ = _mm256_unpacklo_epi16(bg0, ra0);
let rgba1_ = _mm256_unpackhi_epi16(bg0, ra0);
let rgba2_ = _mm256_unpacklo_epi16(bg1, ra1);
let rgba3_ = _mm256_unpackhi_epi16(bg1, ra1);
let rgba0 = _mm256_permute2x128_si256::<32>(rgba0_, rgba1_);
let rgba2 = _mm256_permute2x128_si256::<49>(rgba0_, rgba1_);
let rgba1 = _mm256_permute2x128_si256::<32>(rgba2_, rgba3_);
let rgba3 = _mm256_permute2x128_si256::<49>(rgba2_, rgba3_);
(rgba0, rgba1, rgba2, rgba3)
}
}
#[inline(always)]
pub(crate) fn _mm256_interleave_rgb(
r: __m256i,
g: __m256i,
b: __m256i,
) -> (__m256i, __m256i, __m256i) {
unsafe {
let sh_b = _mm256_setr_epi8(
0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3,
14, 9, 4, 15, 10, 5,
);
let sh_g = _mm256_setr_epi8(
5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8,
3, 14, 9, 4, 15, 10,
);
let sh_r = _mm256_setr_epi8(
10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0, 11, 6, 1, 12, 7, 2, 13,
8, 3, 14, 9, 4, 15,
);
let b0 = _mm256_shuffle_epi8(r, sh_b);
let g0 = _mm256_shuffle_epi8(g, sh_g);
let r0 = _mm256_shuffle_epi8(b, sh_r);
let m0 = _mm256_setr_epi8(
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
-1, 0, 0, -1, 0, 0,
);
let m1 = _mm256_setr_epi8(
0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
0, -1, 0, 0, -1, 0,
);
let p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
let p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1);
let p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1);
let bgr0 = _mm256_permute2x128_si256::<32>(p0, p1);
let bgr1 = _mm256_permute2x128_si256::<48>(p2, p0);
let bgr2 = _mm256_permute2x128_si256::<49>(p1, p2);
(bgr0, bgr1, bgr2)
}
}
#[inline(always)]
pub(crate) fn _mm256_deinterleave_rgb(
rgb0: __m256i,
rgb1: __m256i,
rgb2: __m256i,
) -> (__m256i, __m256i, __m256i) {
unsafe {
let s02_low = _mm256_permute2x128_si256::<32>(rgb0, rgb2);
let s02_high = _mm256_permute2x128_si256::<49>(rgb0, rgb2);
#[rustfmt::skip]
let m0 = _mm256_setr_epi8(
0, 0, -1, 0, 0,
-1, 0, 0, -1, 0,
0, -1, 0, 0, -1,
0, 0, -1, 0, 0,
-1, 0, 0, -1, 0,
0, -1, 0, 0, -1,
0, 0,
);
#[rustfmt::skip]
let m1 = _mm256_setr_epi8(
0, -1, 0, 0, -1,
0, 0, -1, 0, 0,
-1, 0, 0, -1, 0,
0, -1, 0, 0, -1,
0, 0, -1, 0, 0,
-1, 0, 0, -1, 0,
0, -1,
);
let b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), rgb1, m1);
let g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), rgb1, m0);
let r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(rgb1, s02_low, m0), s02_high, m1);
#[rustfmt::skip]
let sh_b = _mm256_setr_epi8(
0, 3, 6, 9, 12,
15, 2, 5, 8, 11,
14, 1, 4, 7, 10,
13, 0, 3, 6, 9,
12, 15, 2, 5, 8,
11, 14, 1, 4, 7,
10, 13,
);
#[rustfmt::skip]
let sh_g = _mm256_setr_epi8(
1, 4, 7, 10, 13,
0, 3, 6, 9, 12,
15, 2, 5, 8, 11,
14, 1, 4, 7, 10,
13, 0, 3, 6, 9,
12, 15, 2, 5, 8,
11, 14,
);
#[rustfmt::skip]
let sh_r = _mm256_setr_epi8(
2, 5, 8, 11, 14,
1, 4, 7, 10, 13,
0, 3, 6, 9, 12,
15, 2, 5, 8, 11,
14, 1, 4, 7, 10,
13, 0, 3, 6, 9,
12, 15,
);
let b0 = _mm256_shuffle_epi8(b0, sh_b);
let g0 = _mm256_shuffle_epi8(g0, sh_g);
let r0 = _mm256_shuffle_epi8(r0, sh_r);
(b0, g0, r0)
}
}
#[inline(always)]
pub(crate) fn _mm256_deinterleave_rgba_epi32(
vals: (__m256i, __m256i, __m256i, __m256i),
) -> (__m256i, __m256i, __m256i, __m256i) {
unsafe {
let p01l = _mm256_unpacklo_epi32(vals.0, vals.1);
let p01h = _mm256_unpackhi_epi32(vals.0, vals.1);
let p23l = _mm256_unpacklo_epi32(vals.2, vals.3);
let p23h = _mm256_unpackhi_epi32(vals.2, vals.3);
let pll = _mm256_permute2x128_si256::<32>(p01l, p23l);
let plh = _mm256_permute2x128_si256::<49>(p01l, p23l);
let phl = _mm256_permute2x128_si256::<32>(p01h, p23h);
let phh = _mm256_permute2x128_si256::<49>(p01h, p23h);
let v0 = _mm256_unpacklo_epi32(pll, plh);
let v1 = _mm256_unpackhi_epi32(pll, plh);
let v2 = _mm256_unpacklo_epi32(phl, phh);
let v3 = _mm256_unpackhi_epi32(phl, phh);
(v0, v1, v2, v3)
}
}
#[inline(always)]
pub(crate) fn _mm256_deinterleave_rgb_epi32(
vals: (__m256i, __m256i, __m256i),
) -> (__m256i, __m256i, __m256i) {
unsafe {
let s02_low = _mm256_permute2x128_si256::<32>(vals.0, vals.2);
let s02_high = _mm256_permute2x128_si256::<49>(vals.0, vals.2);
let b0 = _mm256_blend_epi32::<0x92>(_mm256_blend_epi32::<0x24>(s02_low, s02_high), vals.1);
let g0 = _mm256_blend_epi32::<0x24>(_mm256_blend_epi32::<0x92>(s02_high, s02_low), vals.1);
let r0 = _mm256_blend_epi32::<0x92>(_mm256_blend_epi32::<0x24>(vals.1, s02_low), s02_high);
let v0 = _mm256_shuffle_epi32::<0x6c>(b0);
let v1 = _mm256_shuffle_epi32::<0xb1>(g0);
let v2 = _mm256_shuffle_epi32::<0xc6>(r0);
(v0, v1, v2)
}
}
#[inline(always)]
pub(crate) fn _mm256_deinterleave_rgba_ps(
vals: (__m256, __m256, __m256, __m256),
) -> (__m256, __m256, __m256, __m256) {
unsafe {
let (v0, v1, v2, v3) = _mm256_deinterleave_rgba_epi32((
_mm256_castps_si256(vals.0),
_mm256_castps_si256(vals.1),
_mm256_castps_si256(vals.2),
_mm256_castps_si256(vals.3),
));
(
_mm256_castsi256_ps(v0),
_mm256_castsi256_ps(v1),
_mm256_castsi256_ps(v2),
_mm256_castsi256_ps(v3),
)
}
}
#[inline(always)]
pub(crate) fn _mm256_deinterleave_rgb_ps(
vals: (__m256, __m256, __m256),
) -> (__m256, __m256, __m256) {
unsafe {
let (v0, v1, v2) = _mm256_deinterleave_rgb_epi32((
_mm256_castps_si256(vals.0),
_mm256_castps_si256(vals.1),
_mm256_castps_si256(vals.2),
));
(
_mm256_castsi256_ps(v0),
_mm256_castsi256_ps(v1),
_mm256_castsi256_ps(v2),
)
}
}
#[inline(always)]
pub(crate) fn _mm256_interleave_rgb_epi32(
vals: (__m256i, __m256i, __m256i),
) -> (__m256i, __m256i, __m256i) {
unsafe {
let b0 = _mm256_shuffle_epi32::<0x6c>(vals.0);
let g0 = _mm256_shuffle_epi32::<0xb1>(vals.1);
let r0 = _mm256_shuffle_epi32::<0xc6>(vals.2);
let p0 = _mm256_blend_epi32::<0x24>(_mm256_blend_epi32::<0x92>(b0, g0), r0);
let p1 = _mm256_blend_epi32::<0x24>(_mm256_blend_epi32::<0x92>(g0, r0), b0);
let p2 = _mm256_blend_epi32::<0x24>(_mm256_blend_epi32::<0x92>(r0, b0), g0);
let v0 = _mm256_permute2x128_si256::<32>(p0, p1);
let v1 = p2;
let v2 = _mm256_permute2x128_si256::<49>(p0, p1);
(v0, v1, v2)
}
}
#[inline(always)]
pub(crate) fn _mm256_interleave_rgb_ps(vals: (__m256, __m256, __m256)) -> (__m256, __m256, __m256) {
unsafe {
let (v0, v1, v2) = _mm256_interleave_rgb_epi32((
_mm256_castps_si256(vals.0),
_mm256_castps_si256(vals.1),
_mm256_castps_si256(vals.2),
));
(
_mm256_castsi256_ps(v0),
_mm256_castsi256_ps(v1),
_mm256_castsi256_ps(v2),
)
}
}
#[inline(always)]
pub(crate) fn _mm256_interleave_rgba_epi32(
vals: (__m256i, __m256i, __m256i, __m256i),
) -> (__m256i, __m256i, __m256i, __m256i) {
unsafe {
let bg0 = _mm256_unpacklo_epi32(vals.0, vals.1);
let bg1 = _mm256_unpackhi_epi32(vals.0, vals.1);
let ra0 = _mm256_unpacklo_epi32(vals.2, vals.3);
let ra1 = _mm256_unpackhi_epi32(vals.2, vals.3);
let bgra0_ = _mm256_unpacklo_epi64(bg0, ra0);
let bgra1_ = _mm256_unpackhi_epi64(bg0, ra0);
let bgra2_ = _mm256_unpacklo_epi64(bg1, ra1);
let bgra3_ = _mm256_unpackhi_epi64(bg1, ra1);
let v0 = _mm256_permute2x128_si256::<32>(bgra0_, bgra1_);
let v1 = _mm256_permute2x128_si256::<32>(bgra2_, bgra3_);
let v2 = _mm256_permute2x128_si256::<49>(bgra0_, bgra1_);
let v3 = _mm256_permute2x128_si256::<49>(bgra2_, bgra3_);
(v0, v1, v2, v3)
}
}
#[inline(always)]
pub(crate) fn _mm256_interleave_rgba_ps(
vals: (__m256, __m256, __m256, __m256),
) -> (__m256, __m256, __m256, __m256) {
unsafe {
let (v0, v1, v2, v3) = _mm256_interleave_rgba_epi32((
_mm256_castps_si256(vals.0),
_mm256_castps_si256(vals.1),
_mm256_castps_si256(vals.2),
_mm256_castps_si256(vals.3),
));
(
_mm256_castsi256_ps(v0),
_mm256_castsi256_ps(v1),
_mm256_castsi256_ps(v2),
_mm256_castsi256_ps(v3),
)
}
}