use super::*;
use std::mem::MaybeUninit;
#[derive(Debug, Copy, Clone, Serialize, Deserialize)]
pub struct GF2P64 {}
impl GF2P64 {
#[target_feature(enable = "avx2")]
#[cfg(target_feature = "avx2")]
unsafe fn convert_avx2(dst: &mut [<Self as Domain>::Sharing], src: &[<Self as Domain>::Batch]) {
assert!(dst.len() >= <Self as Domain>::Batch::DIMENSION);
macro_rules! pack {
($x:expr, $y:expr) => {
src.get_unchecked($x).0[$y] as i8
};
}
macro_rules! pack8x4 {
($x:expr, $y:expr) => {
_mm256_set_epi8(
pack!($x, $y),
pack!($x + 1, $y),
pack!($x + 2, $y),
pack!($x + 3, $y),
pack!($x + 4, $y),
pack!($x + 5, $y),
pack!($x + 6, $y),
pack!($x + 7, $y),
pack!($x, $y + 1),
pack!($x + 1, $y + 1),
pack!($x + 2, $y + 1),
pack!($x + 3, $y + 1),
pack!($x + 4, $y + 1),
pack!($x + 5, $y + 1),
pack!($x + 6, $y + 1),
pack!($x + 7, $y + 1),
pack!($x, $y + 2),
pack!($x + 1, $y + 2),
pack!($x + 2, $y + 2),
pack!($x + 3, $y + 2),
pack!($x + 4, $y + 2),
pack!($x + 5, $y + 2),
pack!($x + 6, $y + 2),
pack!($x + 7, $y + 2),
pack!($x, $y + 3),
pack!($x + 1, $y + 3),
pack!($x + 2, $y + 3),
pack!($x + 3, $y + 3),
pack!($x + 4, $y + 3),
pack!($x + 5, $y + 3),
pack!($x + 6, $y + 3),
pack!($x + 7, $y + 3),
)
};
}
for i in (0..BATCH_SIZE_BYTES).step_by(4) {
let mut v: [__m256i; 8] = [
pack8x4!(0x00, i),
pack8x4!(0x08, i),
pack8x4!(0x10, i),
pack8x4!(0x18, i),
pack8x4!(0x20, i),
pack8x4!(0x28, i),
pack8x4!(0x30, i),
pack8x4!(0x38, i),
];
let mut idx = i * 8;
for _ in 0..8 {
let mut res_0: [u8; 8] = [0u8; 8];
let mut res_1: [u8; 8] = [0u8; 8];
let mut res_2: [u8; 8] = [0u8; 8];
let mut res_3: [u8; 8] = [0u8; 8];
for i in 0..8 {
let mask = _mm256_movemask_epi8(v[i]);
res_0[i] = (mask >> 24) as u8;
res_1[i] = (mask >> 16) as u8;
res_2[i] = (mask >> 8) as u8;
res_3[i] = mask as u8;
v[i] = _mm256_add_epi8(v[i], v[i]);
}
dst[idx] = BitSharing64(u64::from_le_bytes(res_0));
dst[idx + 8] = BitSharing64(u64::from_le_bytes(res_1));
dst[idx + 16] = BitSharing64(u64::from_le_bytes(res_2));
dst[idx + 24] = BitSharing64(u64::from_le_bytes(res_3));
idx += 1;
}
}
}
#[target_feature(enable = "sse2")]
#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
unsafe fn convert_sse2(dst: &mut [<Self as Domain>::Sharing], src: &[<Self as Domain>::Batch]) {
assert!(dst.len() >= <Self as Domain>::Batch::DIMENSION);
macro_rules! pack {
($x:expr, $y:expr) => {
src.get_unchecked($x).0[$y] as i8
};
}
macro_rules! pack8x2 {
($x:expr, $y:expr) => {
_mm_set_epi8(
pack!($x, $y),
pack!($x + 1, $y),
pack!($x + 2, $y),
pack!($x + 3, $y),
pack!($x + 4, $y),
pack!($x + 5, $y),
pack!($x + 6, $y),
pack!($x + 7, $y),
pack!($x, $y + 1),
pack!($x + 1, $y + 1),
pack!($x + 2, $y + 1),
pack!($x + 3, $y + 1),
pack!($x + 4, $y + 1),
pack!($x + 5, $y + 1),
pack!($x + 6, $y + 1),
pack!($x + 7, $y + 1),
)
};
}
for i in (0..BATCH_SIZE_BYTES).step_by(2) {
let mut v: [__m128i; 8] = [
pack8x2!(0x00, i),
pack8x2!(0x08, i),
pack8x2!(0x10, i),
pack8x2!(0x18, i),
pack8x2!(0x20, i),
pack8x2!(0x28, i),
pack8x2!(0x30, i),
pack8x2!(0x38, i),
];
let mut idx = i * 8;
for _ in 0..8 {
let mut res_lo: [u8; 8] = [0u8; 8];
let mut res_hi: [u8; 8] = [0u8; 8];
for i in 0..8 {
let mask = _mm_movemask_epi8(v[i]);
res_lo[i] = (mask >> 8) as u8;
res_hi[i] = mask as u8;
v[i] = _mm_add_epi8(v[i], v[i]);
}
dst[idx] = BitSharing64(u64::from_le_bytes(res_lo));
dst[idx + 8] = BitSharing64(u64::from_le_bytes(res_hi));
idx += 1;
}
}
}
#[target_feature(enable = "avx2")]
#[cfg(target_feature = "avx2")]
unsafe fn convert_inv_avx2(
dst: &mut [<Self as Domain>::Batch],
src: &[<Self as Domain>::Sharing],
) {
#[allow(clippy::uninit_assumed_init)]
let mut sharings: [[u8; 8]; <Self as Domain>::Batch::DIMENSION] =
MaybeUninit::uninit().assume_init();
for i in 0..<Self as Domain>::Batch::DIMENSION {
sharings[i] = src[i].0.to_le_bytes();
}
macro_rules! pack {
( $x:expr, $y:expr ) => {
*sharings.get_unchecked($x).get_unchecked($y) as i8
};
}
macro_rules! pack8x4 {
( $x:expr, $y:expr ) => {
_mm256_set_epi8(
pack!($x, $y),
pack!($x + 1, $y),
pack!($x + 2, $y),
pack!($x + 3, $y),
pack!($x + 4, $y),
pack!($x + 5, $y),
pack!($x + 6, $y),
pack!($x + 7, $y),
pack!($x, $y + 1),
pack!($x + 1, $y + 1),
pack!($x + 2, $y + 1),
pack!($x + 3, $y + 1),
pack!($x + 4, $y + 1),
pack!($x + 5, $y + 1),
pack!($x + 6, $y + 1),
pack!($x + 7, $y + 1),
pack!($x, $y + 2),
pack!($x + 1, $y + 2),
pack!($x + 2, $y + 2),
pack!($x + 3, $y + 2),
pack!($x + 4, $y + 2),
pack!($x + 5, $y + 2),
pack!($x + 6, $y + 2),
pack!($x + 7, $y + 2),
pack!($x, $y + 3),
pack!($x + 1, $y + 3),
pack!($x + 2, $y + 3),
pack!($x + 3, $y + 3),
pack!($x + 4, $y + 3),
pack!($x + 5, $y + 3),
pack!($x + 6, $y + 3),
pack!($x + 7, $y + 3),
)
};
}
for i in (0..(<Self as Domain>::Sharing::DIMENSION / 8)).step_by(4) {
let mut v: [__m256i; 8] = [
pack8x4!(0x00, i),
pack8x4!(0x08, i),
pack8x4!(0x10, i),
pack8x4!(0x18, i),
pack8x4!(0x20, i),
pack8x4!(0x28, i),
pack8x4!(0x30, i),
pack8x4!(0x38, i),
];
let mut idx = i * 8;
for _ in 0..8 {
let masks: [_; 8] = [
_mm256_movemask_epi8(v[0]),
_mm256_movemask_epi8(v[1]),
_mm256_movemask_epi8(v[2]),
_mm256_movemask_epi8(v[3]),
_mm256_movemask_epi8(v[4]),
_mm256_movemask_epi8(v[5]),
_mm256_movemask_epi8(v[6]),
_mm256_movemask_epi8(v[7]),
];
dst[idx] = BitBatch([
(masks[0] >> 24) as u8,
(masks[1] >> 24) as u8,
(masks[2] >> 24) as u8,
(masks[3] >> 24) as u8,
(masks[4] >> 24) as u8,
(masks[5] >> 24) as u8,
(masks[6] >> 24) as u8,
(masks[7] >> 24) as u8,
]);
dst[idx + 8] = BitBatch([
(masks[0] >> 16) as u8,
(masks[1] >> 16) as u8,
(masks[2] >> 16) as u8,
(masks[3] >> 16) as u8,
(masks[4] >> 16) as u8,
(masks[5] >> 16) as u8,
(masks[6] >> 16) as u8,
(masks[7] >> 16) as u8,
]);
dst[idx + 16] = BitBatch([
(masks[0] >> 8) as u8,
(masks[1] >> 8) as u8,
(masks[2] >> 8) as u8,
(masks[3] >> 8) as u8,
(masks[4] >> 8) as u8,
(masks[5] >> 8) as u8,
(masks[6] >> 8) as u8,
(masks[7] >> 8) as u8,
]);
dst[idx + 24] = BitBatch([
masks[0] as u8,
masks[1] as u8,
masks[2] as u8,
masks[3] as u8,
masks[4] as u8,
masks[5] as u8,
masks[6] as u8,
masks[7] as u8,
]);
for i in 0..8 {
v[i] = _mm256_add_epi8(v[i], v[i]);
}
idx += 1;
}
}
}
#[target_feature(enable = "sse2")]
#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
unsafe fn convert_inv_sse2(
dst: &mut [<Self as Domain>::Batch],
src: &[<Self as Domain>::Sharing],
) {
#[allow(clippy::uninit_assumed_init)]
let mut sharings: [[u8; 8]; <Self as Domain>::Batch::DIMENSION] =
MaybeUninit::uninit().assume_init();
for i in 0..<Self as Domain>::Batch::DIMENSION {
sharings[i] = src[i].0.to_le_bytes();
}
macro_rules! pack {
( $x:expr, $y:expr ) => {
*sharings.get_unchecked($x).get_unchecked($y) as i8
};
}
macro_rules! pack8x2 {
( $x:expr, $y:expr ) => {
_mm_set_epi8(
pack!($x, $y),
pack!($x + 1, $y),
pack!($x + 2, $y),
pack!($x + 3, $y),
pack!($x + 4, $y),
pack!($x + 5, $y),
pack!($x + 6, $y),
pack!($x + 7, $y),
pack!($x, $y + 1),
pack!($x + 1, $y + 1),
pack!($x + 2, $y + 1),
pack!($x + 3, $y + 1),
pack!($x + 4, $y + 1),
pack!($x + 5, $y + 1),
pack!($x + 6, $y + 1),
pack!($x + 7, $y + 1),
)
};
}
for i in (0..(<Self as Domain>::Sharing::DIMENSION / 8)).step_by(2) {
let mut vecs: [__m128i; 8] = [
pack8x2!(0x00, i),
pack8x2!(0x08, i),
pack8x2!(0x10, i),
pack8x2!(0x18, i),
pack8x2!(0x20, i),
pack8x2!(0x28, i),
pack8x2!(0x30, i),
pack8x2!(0x38, i),
];
let mut idx = i * 8;
for _ in 0..8 {
let masks: [_; 8] = [
_mm_movemask_epi8(vecs[0]),
_mm_movemask_epi8(vecs[1]),
_mm_movemask_epi8(vecs[2]),
_mm_movemask_epi8(vecs[3]),
_mm_movemask_epi8(vecs[4]),
_mm_movemask_epi8(vecs[5]),
_mm_movemask_epi8(vecs[6]),
_mm_movemask_epi8(vecs[7]),
];
dst[idx] = BitBatch([
(masks[0] >> 8) as u8,
(masks[1] >> 8) as u8,
(masks[2] >> 8) as u8,
(masks[3] >> 8) as u8,
(masks[4] >> 8) as u8,
(masks[5] >> 8) as u8,
(masks[6] >> 8) as u8,
(masks[7] >> 8) as u8,
]);
dst[idx + 8] = BitBatch([
masks[0] as u8,
masks[1] as u8,
masks[2] as u8,
masks[3] as u8,
masks[4] as u8,
masks[5] as u8,
masks[6] as u8,
masks[7] as u8,
]);
for vec in &mut vecs {
*vec = _mm_add_epi8(*vec, *vec);
}
idx += 1;
}
}
}
}
impl Domain for GF2P64 {
type Scalar = BitScalar;
type Batch = BitBatch;
type Sharing = BitSharing64;
const PLAYERS: usize = 64;
const PREPROCESSING_REPETITIONS: usize = 631;
const ONLINE_REPETITIONS: usize = 23;
fn convert(dst: &mut [Self::Sharing], src: &[Self::Batch]) {
assert_eq!(src.len(), Self::PLAYERS);
#[cfg(target_feature = "avx2")]
return unsafe { Self::convert_avx2(dst, src) };
#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
return unsafe { Self::convert_sse2(dst, src) };
#[cfg(not(any(target_feature = "avx2", target_feature = "sse2")))]
compile_error!("unsupported platform: requires x86{-64} with SSE2 or AVX2");
}
fn convert_inv(dst: &mut [Self::Batch], src: &[Self::Sharing]) {
assert_eq!(src.len(), Self::Batch::DIMENSION);
assert_eq!(dst.len(), Self::Sharing::DIMENSION);
#[cfg(target_feature = "avx2")]
return unsafe { Self::convert_inv_avx2(dst, src) };
#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
return unsafe { Self::convert_inv_sse2(dst, src) };
#[cfg(not(any(target_feature = "avx2", target_feature = "sse2")))]
compile_error!("unsupported platform: requires x86{-64} with SSE2 or AVX2");
}
}