#[cfg(all(target_arch = "x86", target_feature = "sse2"))]
use core::{
arch::{
x86::{
__m128i,
_mm_set1_epi8,
_mm_and_si128,
_mm_slli_epi16, _mm_srli_epi16,
_mm_setzero_si128
}
}
};
#[cfg(all(target_arch = "x86", target_feature = "avx", target_feature = "avx2"))]
use core::{
arch::{
x86::{
__m256i,
_mm256_set1_epi8,
_mm256_setr_epi8,
_mm256_and_si256,
_mm256_slli_epi16, _mm256_srli_epi16,
_mm256_alignr_epi8,
_mm256_permute2x128_si256,
_mm256_setzero_si256
}
}
};
#[cfg(all(target_arch = "x86", target_feature = "avx512f", target_feature = "avx512bw"))]
use core::{
arch::{
x86::{
__m512i,
_mm512_set1_epi8,
_mm512_set_epi64,
_mm512_or_si512, _mm512_and_si512,
_mm512_slli_epi16, _mm512_srli_epi16,
_mm512_bslli_epi128, _mm512_bsrli_epi128,
_mm512_permutexvar_epi64,
_mm512_setzero_si512
}
}
};
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
use core::{
arch::{
x86_64::{
__m128i,
_mm_set1_epi8,
_mm_and_si128,
_mm_slli_epi16, _mm_srli_epi16,
_mm_setzero_si128
}
}
};
#[cfg(all(target_arch = "x86_64", target_feature = "avx", target_feature = "avx2"))]
use core::{
arch::{
x86_64::{
__m256i,
_mm256_set1_epi8,
_mm256_setr_epi8,
_mm256_and_si256,
_mm256_slli_epi16, _mm256_srli_epi16,
_mm256_alignr_epi8,
_mm256_permute2x128_si256,
_mm256_setzero_si256
}
}
};
#[cfg(all(target_arch = "x86_64", target_feature = "avx512f", target_feature = "avx512bw"))]
use core::{
arch::{
x86_64::{
__m512i,
_mm512_set1_epi8,
_mm512_set_epi64,
_mm512_or_si512, _mm512_and_si512,
_mm512_slli_epi16, _mm512_srli_epi16,
_mm512_bslli_epi128, _mm512_bsrli_epi128,
_mm512_permutexvar_epi64,
_mm512_setzero_si512
}
}
};
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))]
pub unsafe fn _mm_slli_epi8(vector: __m128i, shift: usize) -> __m128i {
return match shift {
0x00 => vector,
0x01 => _mm_slli_epi16::<0x01>(_mm_and_si128(vector, _mm_set1_epi8(0x7F))),
0x02 => _mm_slli_epi16::<0x02>(_mm_and_si128(vector, _mm_set1_epi8(0x3F))),
0x03 => _mm_slli_epi16::<0x03>(_mm_and_si128(vector, _mm_set1_epi8(0x1F))),
0x04 => _mm_slli_epi16::<0x04>(_mm_and_si128(vector, _mm_set1_epi8(0x0F))),
0x05 => _mm_slli_epi16::<0x05>(_mm_and_si128(vector, _mm_set1_epi8(0x07))),
0x06 => _mm_slli_epi16::<0x06>(_mm_and_si128(vector, _mm_set1_epi8(0x03))),
0x07 => _mm_slli_epi16::<0x07>(_mm_and_si128(vector, _mm_set1_epi8(0x01))),
_ => _mm_setzero_si128()
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))]
pub unsafe fn _mm_srli_epi8(vector: __m128i, shift: usize) -> __m128i {
return match shift {
0x00 => vector,
0x01 => _mm_srli_epi16::<0x01>(_mm_and_si128(vector, _mm_set1_epi8(-0x02))),
0x02 => _mm_srli_epi16::<0x02>(_mm_and_si128(vector, _mm_set1_epi8(-0x04))),
0x03 => _mm_srli_epi16::<0x03>(_mm_and_si128(vector, _mm_set1_epi8(-0x08))),
0x04 => _mm_srli_epi16::<0x04>(_mm_and_si128(vector, _mm_set1_epi8(-0x10))),
0x05 => _mm_srli_epi16::<0x05>(_mm_and_si128(vector, _mm_set1_epi8(-0x20))),
0x06 => _mm_srli_epi16::<0x06>(_mm_and_si128(vector, _mm_set1_epi8(-0x40))),
0x07 => _mm_srli_epi16::<0x07>(_mm_and_si128(vector, _mm_set1_epi8(-0x80))),
_ => _mm_setzero_si128()
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
pub unsafe fn _mm256_slli_epi8(vector: __m256i, shift: usize) -> __m256i {
return match shift {
0x00 => vector,
0x01 => _mm256_slli_epi16::<0x01>(_mm256_and_si256(vector, _mm256_set1_epi8(0x7F))),
0x02 => _mm256_slli_epi16::<0x02>(_mm256_and_si256(vector, _mm256_set1_epi8(0x3F))),
0x03 => _mm256_slli_epi16::<0x03>(_mm256_and_si256(vector, _mm256_set1_epi8(0x1F))),
0x04 => _mm256_slli_epi16::<0x04>(_mm256_and_si256(vector, _mm256_set1_epi8(0x0F))),
0x05 => _mm256_slli_epi16::<0x05>(_mm256_and_si256(vector, _mm256_set1_epi8(0x07))),
0x06 => _mm256_slli_epi16::<0x06>(_mm256_and_si256(vector, _mm256_set1_epi8(0x03))),
0x07 => _mm256_slli_epi16::<0x07>(_mm256_and_si256(vector, _mm256_set1_epi8(0x01))),
_ => _mm256_setzero_si256()
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
pub unsafe fn _mm256_srli_epi8(vector: __m256i, shift: usize) -> __m256i {
return match shift {
0x00 => vector,
0x01 => _mm256_srli_epi16::<0x01>(_mm256_and_si256(vector, _mm256_set1_epi8(-0x02))),
0x02 => _mm256_srli_epi16::<0x02>(_mm256_and_si256(vector, _mm256_set1_epi8(-0x04))),
0x03 => _mm256_srli_epi16::<0x03>(_mm256_and_si256(vector, _mm256_set1_epi8(-0x08))),
0x04 => _mm256_srli_epi16::<0x04>(_mm256_and_si256(vector, _mm256_set1_epi8(-0x10))),
0x05 => _mm256_srli_epi16::<0x05>(_mm256_and_si256(vector, _mm256_set1_epi8(-0x20))),
0x06 => _mm256_srli_epi16::<0x06>(_mm256_and_si256(vector, _mm256_set1_epi8(-0x40))),
0x07 => _mm256_srli_epi16::<0x07>(_mm256_and_si256(vector, _mm256_set1_epi8(-0x80))),
_ => _mm256_setzero_si256()
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
pub unsafe fn _mm256_slli_si256(vector: __m256i, shift: usize) -> __m256i {
return match shift {
0x00 => vector,
0x01 => _mm256_and_si256(_mm256_alignr_epi8::<0x0F>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x02 => _mm256_and_si256(_mm256_alignr_epi8::<0x0E>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x03 => _mm256_and_si256(_mm256_alignr_epi8::<0x0D>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x04 => _mm256_and_si256(_mm256_alignr_epi8::<0x0C>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x05 => _mm256_and_si256(_mm256_alignr_epi8::<0x0B>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x06 => _mm256_and_si256(_mm256_alignr_epi8::<0x0A>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x07 => _mm256_and_si256(_mm256_alignr_epi8::<0x09>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x08 => _mm256_and_si256(_mm256_alignr_epi8::<0x08>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x09 => _mm256_and_si256(_mm256_alignr_epi8::<0x07>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x0A => _mm256_and_si256(_mm256_alignr_epi8::<0x06>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x0B => _mm256_and_si256(_mm256_alignr_epi8::<0x05>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x0C => _mm256_and_si256(_mm256_alignr_epi8::<0x04>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x0D => _mm256_and_si256(_mm256_alignr_epi8::<0x03>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x0E => _mm256_and_si256(_mm256_alignr_epi8::<0x02>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x0F => _mm256_and_si256(_mm256_alignr_epi8::<0x01>(vector, _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x10 => _mm256_and_si256(_mm256_alignr_epi8::<0x10>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x11 => _mm256_and_si256(_mm256_alignr_epi8::<0x0F>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x12 => _mm256_and_si256(_mm256_alignr_epi8::<0x0E>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x13 => _mm256_and_si256(_mm256_alignr_epi8::<0x0D>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x14 => _mm256_and_si256(_mm256_alignr_epi8::<0x0C>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x15 => _mm256_and_si256(_mm256_alignr_epi8::<0x0B>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x16 => _mm256_and_si256(_mm256_alignr_epi8::<0x0A>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x17 => _mm256_and_si256(_mm256_alignr_epi8::<0x09>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1)),
0x18 => _mm256_and_si256(_mm256_alignr_epi8::<0x08>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1)),
0x19 => _mm256_and_si256(_mm256_alignr_epi8::<0x07>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1)),
0x1A => _mm256_and_si256(_mm256_alignr_epi8::<0x06>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1)),
0x1B => _mm256_and_si256(_mm256_alignr_epi8::<0x05>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1)),
0x1C => _mm256_and_si256(_mm256_alignr_epi8::<0x04>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1)),
0x1D => _mm256_and_si256(_mm256_alignr_epi8::<0x03>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1)),
0x1E => _mm256_and_si256(_mm256_alignr_epi8::<0x02>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1)),
0x1F => _mm256_and_si256(_mm256_alignr_epi8::<0x01>(_mm256_permute2x128_si256::<0x01>(vector, vector), _mm256_setzero_si256()), _mm256_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1)),
_ => _mm256_setzero_si256()
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx", target_feature = "avx2"))]
pub unsafe fn _mm256_srli_si256(vector: __m256i, shift: usize) -> __m256i {
return match shift {
0x00 => vector,
0x01 => _mm256_and_si256(_mm256_alignr_epi8::<0x01>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0)),
0x02 => _mm256_and_si256(_mm256_alignr_epi8::<0x02>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0)),
0x03 => _mm256_and_si256(_mm256_alignr_epi8::<0x03>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0)),
0x04 => _mm256_and_si256(_mm256_alignr_epi8::<0x04>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0)),
0x05 => _mm256_and_si256(_mm256_alignr_epi8::<0x05>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0)),
0x06 => _mm256_and_si256(_mm256_alignr_epi8::<0x06>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0)),
0x07 => _mm256_and_si256(_mm256_alignr_epi8::<0x07>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0)),
0x08 => _mm256_and_si256(_mm256_alignr_epi8::<0x08>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0)),
0x09 => _mm256_and_si256(_mm256_alignr_epi8::<0x09>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x0A => _mm256_and_si256(_mm256_alignr_epi8::<0x0A>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x0B => _mm256_and_si256(_mm256_alignr_epi8::<0x0B>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x0C => _mm256_and_si256(_mm256_alignr_epi8::<0x0C>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x0D => _mm256_and_si256(_mm256_alignr_epi8::<0x0D>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x0E => _mm256_and_si256(_mm256_alignr_epi8::<0x0E>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x0F => _mm256_and_si256(_mm256_alignr_epi8::<0x0F>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x10 => _mm256_and_si256(_mm256_alignr_epi8::<0x10>(_mm256_permute2x128_si256::<0x01>(vector, vector), vector), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x11 => _mm256_and_si256(_mm256_alignr_epi8::<0x01>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x12 => _mm256_and_si256(_mm256_alignr_epi8::<0x02>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x13 => _mm256_and_si256(_mm256_alignr_epi8::<0x03>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x14 => _mm256_and_si256(_mm256_alignr_epi8::<0x04>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x15 => _mm256_and_si256(_mm256_alignr_epi8::<0x05>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x16 => _mm256_and_si256(_mm256_alignr_epi8::<0x06>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x17 => _mm256_and_si256(_mm256_alignr_epi8::<0x07>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x18 => _mm256_and_si256(_mm256_alignr_epi8::<0x08>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x19 => _mm256_and_si256(_mm256_alignr_epi8::<0x09>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x1A => _mm256_and_si256(_mm256_alignr_epi8::<0x0A>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x1B => _mm256_and_si256(_mm256_alignr_epi8::<0x0B>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x1C => _mm256_and_si256(_mm256_alignr_epi8::<0x0C>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x1D => _mm256_and_si256(_mm256_alignr_epi8::<0x0D>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x1E => _mm256_and_si256(_mm256_alignr_epi8::<0x0E>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
0x1F => _mm256_and_si256(_mm256_alignr_epi8::<0x0F>(_mm256_setzero_si256(), _mm256_permute2x128_si256::<0x01>(vector, vector)), _mm256_setr_epi8(-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
_ => _mm256_setzero_si256()
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
pub unsafe fn _mm512_slli_epi8(vector: __m512i, shift: usize) -> __m512i {
return match shift {
0x00 => vector,
0x01 => _mm512_slli_epi16::<0x01>(_mm512_and_si512(vector, _mm512_set1_epi8(0x7F))),
0x02 => _mm512_slli_epi16::<0x02>(_mm512_and_si512(vector, _mm512_set1_epi8(0x3F))),
0x03 => _mm512_slli_epi16::<0x03>(_mm512_and_si512(vector, _mm512_set1_epi8(0x1F))),
0x04 => _mm512_slli_epi16::<0x04>(_mm512_and_si512(vector, _mm512_set1_epi8(0x0F))),
0x05 => _mm512_slli_epi16::<0x05>(_mm512_and_si512(vector, _mm512_set1_epi8(0x07))),
0x06 => _mm512_slli_epi16::<0x06>(_mm512_and_si512(vector, _mm512_set1_epi8(0x03))),
0x07 => _mm512_slli_epi16::<0x07>(_mm512_and_si512(vector, _mm512_set1_epi8(0x01))),
_ => _mm512_setzero_si512()
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
pub unsafe fn _mm512_srli_epi8(vector: __m512i, shift: usize) -> __m512i {
return match shift {
0x00 => vector,
0x01 => _mm512_srli_epi16::<0x01>(_mm512_and_si512(vector, _mm512_set1_epi8(-0x02))),
0x02 => _mm512_srli_epi16::<0x02>(_mm512_and_si512(vector, _mm512_set1_epi8(-0x04))),
0x03 => _mm512_srli_epi16::<0x03>(_mm512_and_si512(vector, _mm512_set1_epi8(-0x08))),
0x04 => _mm512_srli_epi16::<0x04>(_mm512_and_si512(vector, _mm512_set1_epi8(-0x10))),
0x05 => _mm512_srli_epi16::<0x05>(_mm512_and_si512(vector, _mm512_set1_epi8(-0x20))),
0x06 => _mm512_srli_epi16::<0x06>(_mm512_and_si512(vector, _mm512_set1_epi8(-0x40))),
0x07 => _mm512_srli_epi16::<0x07>(_mm512_and_si512(vector, _mm512_set1_epi8(-0x80))),
_ => _mm512_setzero_si512()
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
pub unsafe fn _mm512_slli_si512(vector: __m512i, shift: usize) -> __m512i {
return match shift {
0x00 => vector,
0x01 => _mm512_or_si512(_mm512_bslli_epi128::<0x01>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0F>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x02 => _mm512_or_si512(_mm512_bslli_epi128::<0x02>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0E>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x03 => _mm512_or_si512(_mm512_bslli_epi128::<0x03>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0D>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x04 => _mm512_or_si512(_mm512_bslli_epi128::<0x04>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0C>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x05 => _mm512_or_si512(_mm512_bslli_epi128::<0x05>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0B>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x06 => _mm512_or_si512(_mm512_bslli_epi128::<0x06>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0A>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x07 => _mm512_or_si512(_mm512_bslli_epi128::<0x07>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x09>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x08 => _mm512_or_si512(_mm512_bslli_epi128::<0x08>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x08>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x09 => _mm512_or_si512(_mm512_bslli_epi128::<0x09>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x07>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x0A => _mm512_or_si512(_mm512_bslli_epi128::<0x0A>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x06>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x0B => _mm512_or_si512(_mm512_bslli_epi128::<0x0B>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x05>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x0C => _mm512_or_si512(_mm512_bslli_epi128::<0x0C>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x04>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x0D => _mm512_or_si512(_mm512_bslli_epi128::<0x0D>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x03>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x0E => _mm512_or_si512(_mm512_bslli_epi128::<0x0E>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x02>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x0F => _mm512_or_si512(_mm512_bslli_epi128::<0x0F>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x01>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0))),
0x10 => _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), vector), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)),
0x11 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x01>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0F>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x12 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x02>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0E>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x13 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x03>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0D>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x14 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x04>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0C>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x15 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x05>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0B>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x16 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x06>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0A>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x17 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x07>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x09>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x18 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x08>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x08>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x19 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x09>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x07>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x1A => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0A>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x06>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x1B => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0B>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x05>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x1C => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0C>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x04>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x1D => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0D>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x03>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x1E => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0E>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x02>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x1F => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0F>(vector)), _mm512_set_epi64(-1, -1, -1, -1, -1, -1, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x01>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0))),
0x20 => _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), vector), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)),
0x21 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x01>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0F>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x22 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x02>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0E>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x23 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x03>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0D>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x24 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x04>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0C>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x25 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x05>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0B>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x26 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x06>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0A>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x27 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x07>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x09>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x28 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x08>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x08>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x29 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x09>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x07>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x2A => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0A>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x06>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x2B => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0B>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x05>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x2C => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0C>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x04>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x2D => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0D>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x03>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x2E => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0E>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x02>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x2F => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0F>(vector)), _mm512_set_epi64(-1, -1, -1, -1, 0, 0, 0, 0)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x01>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0))),
0x30 => _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), vector), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)),
0x31 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x01>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x32 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x02>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x33 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x03>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x34 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x04>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x35 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x05>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x36 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x06>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x37 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x07>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x38 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x08>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x39 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x09>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x3A => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0A>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x3B => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0B>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x3C => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0C>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x3D => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0D>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x3E => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0E>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
0x3F => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0F>(vector)), _mm512_set_epi64(-1, -1, 0, 0, 0, 0, 0, 0)), _mm512_setzero_si512()),
_ => _mm512_setzero_si512()
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx512f", target_feature = "avx512bw"))]
pub unsafe fn _mm512_srli_si512(vector: __m512i, shift: usize) -> __m512i {
return match shift {
0x00 => vector,
0x01 => _mm512_or_si512(_mm512_bsrli_epi128::<0x01>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0F>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x02 => _mm512_or_si512(_mm512_bsrli_epi128::<0x02>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0E>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x03 => _mm512_or_si512(_mm512_bsrli_epi128::<0x03>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0D>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x04 => _mm512_or_si512(_mm512_bsrli_epi128::<0x04>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0C>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x05 => _mm512_or_si512(_mm512_bsrli_epi128::<0x05>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0B>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x06 => _mm512_or_si512(_mm512_bsrli_epi128::<0x06>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x0A>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x07 => _mm512_or_si512(_mm512_bsrli_epi128::<0x07>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x09>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x08 => _mm512_or_si512(_mm512_bsrli_epi128::<0x08>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x08>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x09 => _mm512_or_si512(_mm512_bsrli_epi128::<0x09>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x07>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x0A => _mm512_or_si512(_mm512_bsrli_epi128::<0x0A>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x06>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x0B => _mm512_or_si512(_mm512_bsrli_epi128::<0x0B>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x05>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x0C => _mm512_or_si512(_mm512_bsrli_epi128::<0x0C>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x04>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x0D => _mm512_or_si512(_mm512_bsrli_epi128::<0x0D>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x03>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x0E => _mm512_or_si512(_mm512_bsrli_epi128::<0x0E>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x02>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x0F => _mm512_or_si512(_mm512_bsrli_epi128::<0x0F>(vector), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bslli_epi128::<0x01>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1))),
0x10 => _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), vector), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)),
0x11 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x01>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0F>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x12 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x02>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0E>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x13 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x03>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0D>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x14 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x04>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0C>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x15 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x05>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0B>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x16 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x06>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x0A>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x17 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x07>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x09>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x18 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x08>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x08>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x19 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x09>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x07>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x1A => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0A>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x06>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x1B => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0B>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x05>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x1C => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0C>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x04>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x1D => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0D>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x03>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x1E => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0E>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x02>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x1F => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(1, 0, 7, 6, 5, 4, 3, 2), _mm512_bsrli_epi128::<0x0F>(vector)), _mm512_set_epi64(0, 0, -1, -1, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bslli_epi128::<0x01>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1))),
0x20 => _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), vector), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)),
0x21 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x01>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0F>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x22 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x02>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0E>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x23 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x03>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0D>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x24 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x04>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0C>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x25 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x05>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0B>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x26 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x06>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x0A>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x27 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x07>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x09>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x28 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x08>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x08>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x29 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x09>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x07>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x2A => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0A>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x06>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x2B => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0B>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x05>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x2C => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0C>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x04>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x2D => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0D>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x03>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x2E => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0E>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x02>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x2F => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(3, 2, 1, 0, 7, 6, 5, 4), _mm512_bsrli_epi128::<0x0F>(vector)), _mm512_set_epi64(0, 0, 0, 0, -1, -1, -1, -1)), _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bslli_epi128::<0x01>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1))),
0x30 => _mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), vector), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)),
0x31 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x01>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x32 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x02>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x33 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x03>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x34 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x04>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x35 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x05>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x36 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x06>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x37 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x07>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x38 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x08>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x39 => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x09>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x3A => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0A>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x3B => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0B>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x3C => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0C>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x3D => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0D>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x3E => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0E>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
0x3F => _mm512_or_si512(_mm512_and_si512(_mm512_permutexvar_epi64(_mm512_set_epi64(5, 4, 3, 2, 1, 0, 7, 6), _mm512_bsrli_epi128::<0x0F>(vector)), _mm512_set_epi64(0, 0, 0, 0, 0, 0, -1, -1)), _mm512_setzero_si512()),
_ => _mm512_setzero_si512()
};
}