use crate::{
arch::*,
block::{Block, Zeroed},
};
const POLYNOMIAL: __m128i = unsafe { core::mem::transmute([0x1u64, 0xc200000000000000]) };
#[inline(always)]
#[allow(unknown_lints, clippy::needless_late_init)]
pub unsafe fn init(mut h: __m128i) -> __m128i {
let mut t1;
let t2;
let mut t3;
t2 = _mm_shuffle_epi32(h, 0b11111111);
t1 = _mm_srli_epi64(h, 63);
h = _mm_slli_epi64(h, 1);
t3 = __m128i::zeroed();
t3 = _mm_cmpgt_epi32(t3, t2);
t1 = _mm_slli_si128(t1, 8);
h = _mm_or_si128(h, t1);
t3 = _mm_and_si128(t3, POLYNOMIAL);
h = h.xor(t3);
h
}
#[inline(always)]
#[allow(unknown_lints, clippy::needless_late_init)]
pub unsafe fn gfmul(a: __m128i, b: __m128i) -> __m128i {
let t = a;
let tmp0 = b;
let mut tmp1;
let mut tmp2;
let mut tmp3;
let mut tmp4;
tmp1 = _mm_clmulepi64_si128(t, tmp0, 0x00);
tmp4 = _mm_clmulepi64_si128(t, tmp0, 0x11);
tmp2 = _mm_clmulepi64_si128(t, tmp0, 0x10);
tmp3 = _mm_clmulepi64_si128(t, tmp0, 0x01);
tmp2 = tmp2.xor(tmp3);
tmp3 = _mm_slli_si128(tmp2, 8);
tmp2 = _mm_srli_si128(tmp2, 8);
tmp1 = tmp1.xor(tmp3);
tmp4 = tmp4.xor(tmp2);
reduce(tmp1, tmp4)
}
#[inline(always)]
#[allow(unknown_lints, clippy::needless_late_init)]
pub unsafe fn reduce(mut tmp1: __m128i, tmp4: __m128i) -> __m128i {
let t;
let mut tmp2;
let mut tmp3;
tmp2 = _mm_clmulepi64_si128(tmp1, POLYNOMIAL, 0x10);
tmp3 = _mm_shuffle_epi32(tmp1, 78);
tmp1 = tmp2.xor(tmp3);
tmp2 = _mm_clmulepi64_si128(tmp1, POLYNOMIAL, 0x10);
tmp3 = _mm_shuffle_epi32(tmp1, 78);
tmp1 = tmp2.xor(tmp3);
t = tmp1.xor(tmp4);
t
}