#[cfg(all(target_arch = "x86", all(target_feature = "sse2", not(target_feature = "ssse3"))))]
use core::{
arch::{
x86::{
__m128i,
_mm_set_epi16, _mm_set_epi32,
_mm_slli_epi16, _mm_srli_epi16,
_mm_slli_epi32, _mm_srli_epi32,
_mm_or_si128,
_mm_shuffle_epi32
}
}
};
#[cfg(all(target_arch = "x86", all(target_feature = "sse2", target_feature = "ssse3")))]
use core::{
arch::{
x86::{
__m128i,
_mm_set_epi8,
_mm_shuffle_epi8
}
}
};
#[cfg(all(target_arch = "x86", target_feature = "avx", target_feature = "avx2"))]
use core::{
arch::{
x86::{
__m256i,
_mm256_set_epi8,
_mm256_shuffle_epi8
}
}
};
#[cfg(all(target_arch = "x86", target_feature = "avx512f", target_feature = "avx512bw"))]
use core::{
arch::{
x86::{
__m512i,
_mm512_set_epi8, _mm512_set_epi64,
_mm512_shuffle_epi8,
_mm512_permutexvar_epi64
}
}
};
#[cfg(all(target_arch = "x86_64", all(target_feature = "sse2", not(target_feature = "ssse3"))))]
use core::{
arch::{
x86_64::{
__m128i,
_mm_set_epi16, _mm_set_epi32,
_mm_slli_epi16, _mm_srli_epi16,
_mm_slli_epi32, _mm_srli_epi32,
_mm_or_si128,
_mm_shuffle_epi32
}
}
};
#[cfg(all(target_arch = "x86_64", all(target_feature = "sse2", target_feature = "ssse3")))]
use core::{
arch::{
x86_64::{
__m128i,
_mm_set_epi8,
_mm_shuffle_epi8
}
}
};
#[cfg(all(target_arch = "x86_64", target_feature = "avx", target_feature = "avx2"))]
use core::{
arch::{
x86_64::{
__m256i,
_mm256_set_epi8,
_mm256_shuffle_epi8
}
}
};
#[cfg(all(target_arch = "x86_64", target_feature = "avx512f", target_feature = "avx512bw"))]
use core::{
arch::{
x86_64::{
__m512i,
_mm512_set_epi8, _mm512_set_epi64,
_mm512_shuffle_epi8,
_mm512_permutexvar_epi64
}
}
};
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(target_feature = "ssse3")))]
#[inline]
pub unsafe fn _mm_vrev16_epi8(vector: __m128i) -> __m128i {
return _mm_or_si128(
_mm_srli_epi16::<0x00000008>(vector),
_mm_slli_epi16::<0x00000008>(vector)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(target_feature = "ssse3")))]
#[inline]
pub unsafe fn _mm_vrev32_epi8(vector: __m128i) -> __m128i {
return _mm_or_si128(
_mm_srli_epi32::<0x00000010>(_mm_vrev16_epi8(vector)),
_mm_slli_epi32::<0x00000010>(_mm_vrev16_epi8(vector))
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(target_feature = "ssse3")))]
#[inline]
pub unsafe fn _mm_vrev32_epi16(vector: __m128i) -> __m128i {
return _mm_or_si128(
_mm_srli_epi32::<0x00000010>(vector),
_mm_slli_epi32::<0x00000010>(vector)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(target_feature = "ssse3")))]
#[inline]
pub unsafe fn _mm_vrev64_epi8(vector: __m128i) -> __m128i {
return _mm_shuffle_epi32::<0x000000B1>(
_mm_vrev32_epi8(vector)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(target_feature = "ssse3")))]
#[inline]
pub unsafe fn _mm_vrev64_epi16(vector: __m128i) -> __m128i {
return _mm_or_si128(
_mm_srli_epi32::<0x00000010>(_mm_shuffle_epi32::<0x000000B1>(vector)),
_mm_slli_epi32::<0x00000010>(_mm_shuffle_epi32::<0x000000B1>(vector))
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(target_feature = "ssse3")))]
#[inline]
pub unsafe fn _mm_vrev64_epi32(vector: __m128i) -> __m128i {
return _mm_shuffle_epi32::<0x000000B1>(vector);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", target_feature = "ssse3"))]
#[inline]
pub unsafe fn _mm_vrev16_epi8(vector: __m128i) -> __m128i {
return _mm_shuffle_epi8(
vector,
_mm_set_epi8(
0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09,
0x06, 0x07, 0x04, 0x05, 0x02, 0x03, 0x00, 0x01
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", target_feature = "ssse3"))]
#[inline]
pub unsafe fn _mm_vrev32_epi8(vector: __m128i) -> __m128i {
return _mm_shuffle_epi8(
vector,
_mm_set_epi8(
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B,
0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", target_feature = "ssse3"))]
#[inline]
pub unsafe fn _mm_vrev32_epi16(vector: __m128i) -> __m128i {
return _mm_shuffle_epi8(
vector,
_mm_set_epi8(
0x0D, 0x0C, 0x0F, 0x0E, 0x09, 0x08, 0x0B, 0x0A,
0x05, 0x04, 0x07, 0x06, 0x01, 0x00, 0x03, 0x02
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", target_feature = "ssse3"))]
#[inline]
pub unsafe fn _mm_vrev64_epi8(vector: __m128i) -> __m128i {
return _mm_shuffle_epi8(
vector,
_mm_set_epi8(
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", target_feature = "ssse3"))]
#[inline]
pub unsafe fn _mm_vrev64_epi16(vector: __m128i) -> __m128i {
return _mm_shuffle_epi8(
vector,
_mm_set_epi8(
0x09, 0x08, 0x0B, 0x0A, 0x0D, 0x0C, 0x0F, 0x0E,
0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", target_feature = "ssse3"))]
#[inline]
pub unsafe fn _mm_vrev64_epi32(vector: __m128i) -> __m128i {
return _mm_shuffle_epi8(
vector,
_mm_set_epi8(
0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C,
0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev16_epi8(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x1E, 0x1F, 0x1C, 0x1D, 0x1A, 0x1B, 0x18, 0x19,
0x16, 0x17, 0x14, 0x15, 0x12, 0x13, 0x10, 0x11,
0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09,
0x06, 0x07, 0x04, 0x05, 0x02, 0x03, 0x00, 0x01
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev32_epi8(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B,
0x14, 0x15, 0x16, 0x17, 0x10, 0x11, 0x12, 0x13,
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B,
0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev32_epi16(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x1D, 0x1C, 0x1F, 0x1E, 0x19, 0x18, 0x1B, 0x1A,
0x15, 0x14, 0x17, 0x16, 0x11, 0x10, 0x13, 0x12,
0x0D, 0x0C, 0x0F, 0x0E, 0x09, 0x08, 0x0B, 0x0A,
0x05, 0x04, 0x07, 0x06, 0x01, 0x00, 0x03, 0x02
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev64_epi8(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev64_epi16(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x19, 0x18, 0x1B, 0x1A, 0x1D, 0x1C, 0x1F, 0x1E,
0x11, 0x10, 0x13, 0x12, 0x15, 0x14, 0x17, 0x16,
0x09, 0x08, 0x0B, 0x0A, 0x0D, 0x0C, 0x0F, 0x0E,
0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev64_epi32(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x1B, 0x1A, 0x19, 0x18, 0x1F, 0x1E, 0x1D, 0x1C,
0x13, 0x12, 0x11, 0x10, 0x17, 0x16, 0x15, 0x14,
0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C,
0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev128_epi8(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev128_epi16(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x11, 0x10, 0x13, 0x12, 0x15, 0x14, 0x17, 0x16,
0x19, 0x18, 0x1B, 0x1A, 0x1D, 0x1C, 0x1F, 0x1E,
0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06,
0x09, 0x08, 0x0B, 0x0A, 0x0D, 0x0C, 0x0F, 0x0E
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev128_epi32(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x13, 0x12, 0x11, 0x10, 0x17, 0x16, 0x15, 0x14,
0x1B, 0x1A, 0x19, 0x18, 0x1F, 0x1E, 0x1D, 0x1C,
0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04,
0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
#[inline]
pub unsafe fn _mm256_vrev128_epi64(vector: __m256i) -> __m256i {
return _mm256_shuffle_epi8(
vector,
_mm256_set_epi8(
0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18,
0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev16_epi8(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x3E, 0x3F, 0x3C, 0x3D, 0x3A, 0x3B, 0x38, 0x39,
0x36, 0x37, 0x34, 0x35, 0x32, 0x33, 0x30, 0x31,
0x2E, 0x2F, 0x2C, 0x2D, 0x2A, 0x2B, 0x28, 0x29,
0x26, 0x27, 0x24, 0x25, 0x22, 0x23, 0x20, 0x21,
0x1E, 0x1F, 0x1C, 0x1D, 0x1A, 0x1B, 0x18, 0x19,
0x16, 0x17, 0x14, 0x15, 0x12, 0x13, 0x10, 0x11,
0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09,
0x06, 0x07, 0x04, 0x05, 0x02, 0x03, 0x00, 0x01
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev32_epi8(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x3C, 0x3D, 0x3E, 0x3F, 0x38, 0x39, 0x3A, 0x3B,
0x34, 0x35, 0x36, 0x37, 0x30, 0x31, 0x32, 0x33,
0x2C, 0x2D, 0x2E, 0x2F, 0x28, 0x29, 0x2A, 0x2B,
0x24, 0x25, 0x26, 0x27, 0x20, 0x21, 0x22, 0x23,
0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B,
0x14, 0x15, 0x16, 0x17, 0x10, 0x11, 0x12, 0x13,
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B,
0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev32_epi16(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x3D, 0x3C, 0x3F, 0x3E, 0x39, 0x38, 0x3B, 0x3A,
0x35, 0x34, 0x37, 0x36, 0x31, 0x30, 0x33, 0x32,
0x2D, 0x2C, 0x2F, 0x2E, 0x29, 0x28, 0x2B, 0x2A,
0x25, 0x24, 0x27, 0x26, 0x21, 0x20, 0x23, 0x22,
0x1D, 0x1C, 0x1F, 0x1E, 0x19, 0x18, 0x1B, 0x1A,
0x15, 0x14, 0x17, 0x16, 0x11, 0x10, 0x13, 0x12,
0x0D, 0x0C, 0x0F, 0x0E, 0x09, 0x08, 0x0B, 0x0A,
0x05, 0x04, 0x07, 0x06, 0x01, 0x00, 0x03, 0x02
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev64_epi8(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev64_epi16(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x39, 0x38, 0x3B, 0x3A, 0x3D, 0x3C, 0x3F, 0x3E,
0x31, 0x30, 0x33, 0x32, 0x35, 0x34, 0x37, 0x36,
0x29, 0x28, 0x2B, 0x2A, 0x2D, 0x2C, 0x2F, 0x2E,
0x21, 0x20, 0x23, 0x22, 0x25, 0x24, 0x27, 0x26,
0x19, 0x18, 0x1B, 0x1A, 0x1D, 0x1C, 0x1F, 0x1E,
0x11, 0x10, 0x13, 0x12, 0x15, 0x14, 0x17, 0x16,
0x09, 0x08, 0x0B, 0x0A, 0x0D, 0x0C, 0x0F, 0x0E,
0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev64_epi32(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x3B, 0x3A, 0x39, 0x38, 0x3F, 0x3E, 0x3D, 0x3C,
0x33, 0x32, 0x31, 0x30, 0x37, 0x36, 0x35, 0x34,
0x2B, 0x2A, 0x29, 0x28, 0x2F, 0x2E, 0x2D, 0x2C,
0x23, 0x22, 0x21, 0x20, 0x27, 0x26, 0x25, 0x24,
0x1B, 0x1A, 0x19, 0x18, 0x1F, 0x1E, 0x1D, 0x1C,
0x13, 0x12, 0x11, 0x10, 0x17, 0x16, 0x15, 0x14,
0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C,
0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev128_epi8(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev128_epi16(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x31, 0x30, 0x33, 0x32, 0x35, 0x34, 0x37, 0x36,
0x39, 0x38, 0x3B, 0x3A, 0x3D, 0x3C, 0x3F, 0x3E,
0x21, 0x20, 0x23, 0x22, 0x25, 0x24, 0x27, 0x26,
0x29, 0x28, 0x2B, 0x2A, 0x2D, 0x2C, 0x2F, 0x2E,
0x11, 0x10, 0x13, 0x12, 0x15, 0x14, 0x17, 0x16,
0x19, 0x18, 0x1B, 0x1A, 0x1D, 0x1C, 0x1F, 0x1E,
0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06,
0x09, 0x08, 0x0B, 0x0A, 0x0D, 0x0C, 0x0F, 0x0E
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev128_epi32(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x33, 0x32, 0x31, 0x30, 0x37, 0x36, 0x35, 0x34,
0x3B, 0x3A, 0x39, 0x38, 0x3F, 0x3E, 0x3D, 0x3C,
0x23, 0x22, 0x21, 0x20, 0x27, 0x26, 0x25, 0x24,
0x2B, 0x2A, 0x29, 0x28, 0x2F, 0x2E, 0x2D, 0x2C,
0x13, 0x12, 0x11, 0x10, 0x17, 0x16, 0x15, 0x14,
0x1B, 0x1A, 0x19, 0x18, 0x1F, 0x1E, 0x1D, 0x1C,
0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04,
0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev128_epi64(vector: __m512i) -> __m512i {
return _mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38,
0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20,
0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28,
0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18,
0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev256_epi8(vector: __m512i) -> __m512i {
return _mm512_permutexvar_epi64(
_mm512_set_epi64(5, 4, 7, 6, 1, 0, 3, 2),
_mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
)
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev256_epi16(vector: __m512i) -> __m512i {
return _mm512_permutexvar_epi64(
_mm512_set_epi64(5, 4, 7, 6, 1, 0, 3, 2),
_mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x21, 0x20, 0x23, 0x22, 0x25, 0x24, 0x27, 0x26,
0x29, 0x28, 0x2B, 0x2A, 0x2D, 0x2C, 0x2F, 0x2E,
0x31, 0x30, 0x33, 0x32, 0x35, 0x34, 0x37, 0x36,
0x39, 0x38, 0x3B, 0x3A, 0x3D, 0x3C, 0x3F, 0x3E,
0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06,
0x09, 0x08, 0x0B, 0x0A, 0x0D, 0x0C, 0x0F, 0x0E,
0x11, 0x10, 0x13, 0x12, 0x15, 0x14, 0x17, 0x16,
0x19, 0x18, 0x1B, 0x1A, 0x1D, 0x1C, 0x1F, 0x1E
)
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev256_epi32(vector: __m512i) -> __m512i {
return _mm512_permutexvar_epi64(
_mm512_set_epi64(5, 4, 7, 6, 1, 0, 3, 2),
_mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x23, 0x22, 0x21, 0x20, 0x27, 0x26, 0x25, 0x24,
0x2B, 0x2A, 0x29, 0x28, 0x2F, 0x2E, 0x2D, 0x2C,
0x33, 0x32, 0x31, 0x30, 0x37, 0x36, 0x35, 0x34,
0x3B, 0x3A, 0x39, 0x38, 0x3F, 0x3E, 0x3D, 0x3C,
0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04,
0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C,
0x13, 0x12, 0x11, 0x10, 0x17, 0x16, 0x15, 0x14,
0x1B, 0x1A, 0x19, 0x18, 0x1F, 0x1E, 0x1D, 0x1C
)
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev256_epi64(vector: __m512i) -> __m512i {
return _mm512_permutexvar_epi64(
_mm512_set_epi64(5, 4, 7, 6, 1, 0, 3, 2),
_mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20,
0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28,
0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38,
0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18
)
)
);
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
#[inline]
pub unsafe fn _mm512_vrev256_epi128(vector: __m512i) -> __m512i {
return _mm512_permutexvar_epi64(
_mm512_set_epi64(5, 4, 7, 6, 1, 0, 3, 2),
_mm512_shuffle_epi8(
vector,
_mm512_set_epi8(
0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28,
0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20,
0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38,
0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18,
0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10
)
)
);
}