colorutils_rs/avx/
utils.rs

1/*
2 * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved.
3 * //
4 * // Use of this source code is governed by a BSD-style
5 * // license that can be found in the LICENSE file.
6 */
7
8use crate::avx::shuffle;
9#[cfg(target_arch = "x86")]
10use std::arch::x86::*;
11#[cfg(target_arch = "x86_64")]
12use std::arch::x86_64::*;
13
14#[inline(always)]
15pub unsafe fn avx2_interleave_rgb_ps(a: __m256, b: __m256, c: __m256) -> (__m256, __m256, __m256) {
16    let b0 = _mm256_shuffle_epi32::<0x6c>(_mm256_castps_si256(a));
17    let g0 = _mm256_shuffle_epi32::<0xb1>(_mm256_castps_si256(b));
18    let r0 = _mm256_shuffle_epi32::<0xc6>(_mm256_castps_si256(c));
19
20    let p0 = _mm256_blend_epi32::<0x24>(_mm256_blend_epi32::<0x92>(b0, g0), r0);
21    let p1 = _mm256_blend_epi32::<0x24>(_mm256_blend_epi32::<0x92>(g0, r0), b0);
22    let p2 = _mm256_blend_epi32::<0x24>(_mm256_blend_epi32::<0x92>(r0, b0), g0);
23
24    let bgr0 = _mm256_permute2x128_si256::<32>(p0, p1);
25    let bgr2 = _mm256_permute2x128_si256::<49>(p0, p1);
26    (
27        _mm256_castsi256_ps(bgr0),
28        _mm256_castsi256_ps(p2),
29        _mm256_castsi256_ps(bgr2),
30    )
31}
32
33#[inline]
34#[target_feature(enable = "avx2")]
35pub unsafe fn _mm256_packus_four_epi32(a: __m256i, b: __m256i, c: __m256i, d: __m256i) -> __m256i {
36    let ab = _mm256_packs_epi32(a, b);
37    let cd = _mm256_packs_epi32(c, d);
38
39    const MASK: i32 = shuffle(3, 1, 2, 0);
40
41    let abcd = _mm256_permute4x64_epi64::<MASK>(_mm256_packus_epi16(ab, cd));
42    _mm256_shuffle_epi32::<MASK>(abcd)
43}