use super::Block;
const MOD: u64 = 0b10000111;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
cpufeatures::new!(target_feature_pclmulqdq, "pclmulqdq");
impl Block {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline]
pub fn clmul(&self, rhs: &Self) -> (Self, Self) {
if target_feature_pclmulqdq::get() {
unsafe {
let (low, high) = clmul::clmul128(self.into(), rhs.into());
(low.into(), high.into())
}
} else {
let (low, high) = scalar::clmul128(self.into(), rhs.into());
(low.into(), high.into())
}
}
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
#[inline]
pub fn clmul(&self, rhs: &Self) -> (Self, Self) {
let (low, high) = scalar::clmul128(self.into(), rhs.into());
(low.into(), high.into())
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline]
pub fn gf_mul(&self, rhs: &Self) -> Self {
if target_feature_pclmulqdq::get() {
unsafe { clmul::gf128_mul(self.into(), rhs.into()).into() }
} else {
scalar::gf128_mul(self.into(), rhs.into()).into()
}
}
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
#[inline]
pub fn gf_mul(&self, rhs: &Self) -> Self {
scalar::gf128_mul(self.into(), rhs.into()).into()
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline]
pub fn gf_reduce(low: &Self, high: &Self) -> Self {
if target_feature_pclmulqdq::get() {
unsafe { clmul::gf128_reduce(low.into(), high.into()).into() }
} else {
scalar::gf128_reduce(low.into(), high.into()).into()
}
}
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
#[inline]
pub fn gf_reduce(low: &Self, high: &Self) -> Self {
scalar::gf128_reduce(low.into(), high.into()).into()
}
#[inline]
pub fn gf_pow(&self, mut exp: u64) -> Block {
let mut s = Block::ONE;
let mut pow2 = *self;
while exp != 0 {
if exp & 1 != 0 {
s = s.gf_mul(&pow2);
}
pow2 = pow2.gf_mul(&pow2);
exp >>= 1;
}
s
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod clmul {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use super::MOD;
#[target_feature(enable = "pclmulqdq")]
#[inline]
pub fn gf128_mul(a: __m128i, b: __m128i) -> __m128i {
let (low, high) = clmul128(a, b);
gf128_reduce(low, high)
}
#[target_feature(enable = "pclmulqdq")]
#[inline]
pub fn clmul128(a: __m128i, b: __m128i) -> (__m128i, __m128i) {
let ab_low = _mm_clmulepi64_si128::<0x00>(a, b);
let ab_high = _mm_clmulepi64_si128::<0x11>(a, b);
let ab_lohi1 = _mm_clmulepi64_si128::<0x01>(a, b);
let ab_lohi2 = _mm_clmulepi64_si128::<0x10>(a, b);
let ab_mid = _mm_xor_si128(ab_lohi1, ab_lohi2);
let low = _mm_xor_si128(ab_low, _mm_slli_si128::<8>(ab_mid));
let high = _mm_xor_si128(ab_high, _mm_srli_si128::<8>(ab_mid));
(low, high)
}
#[target_feature(enable = "pclmulqdq")]
#[inline]
pub fn gf128_reduce(mut low: __m128i, mut high: __m128i) -> __m128i {
let modulus = [MOD, 0];
let modulus = unsafe { _mm_loadu_si64(modulus.as_ptr().cast()) };
let tmp = _mm_clmulepi64_si128::<0x01>(high, modulus);
let tmp_shifted = _mm_slli_si128::<8>(tmp);
low = _mm_xor_si128(low, tmp_shifted);
high = _mm_xor_si128(high, tmp_shifted);
let tmp = _mm_clmulepi64_si128::<0x01>(tmp, modulus);
low = _mm_xor_si128(low, tmp);
let tmp = _mm_clmulepi64_si128::<0x00>(high, modulus);
_mm_xor_si128(low, tmp)
}
#[cfg(all(test, target_feature = "pclmulqdq"))]
mod test {
use std::{arch::x86_64::__m128i, mem::transmute};
use crate::block::gf128::clmul::{clmul128, gf128_mul, gf128_reduce};
#[test]
fn test_gf128_mul_zero() {
unsafe {
let a = transmute(0x19831239123916248127031273012381_u128);
let b = transmute(0_u128);
let exp = 0_u128;
let mul = transmute(gf128_mul(a, b));
assert_eq!(exp, mul);
}
}
#[test]
fn test_gf128_mul_onw() {
unsafe {
let a = transmute(0x19831239123916248127031273012381_u128);
let b = transmute(0x1_u128);
let exp = 0x19831239123916248127031273012381_u128;
let mul = transmute(gf128_mul(a, b));
assert_eq!(exp, mul);
}
}
#[test]
fn test_gf128_mul() {
unsafe {
let a = transmute(0x19831239123916248127031273012381_u128);
let b = transmute(0xabcdef0123456789abcdef0123456789_u128);
let exp = 0x63a033d0ed643e85153c50f4268a7d9_u128;
let mul = transmute(gf128_mul(a, b));
assert_eq!(exp, mul);
}
}
#[test]
fn test_clmul128() {
unsafe {
let a: __m128i = transmute(0x19831239123916248127031273012381_u128);
let b: __m128i = transmute(0xabcdef0123456789abcdef0123456789_u128);
let (low, high) = clmul128(a, b);
let [low, high] = transmute([low, high]);
let exp_low: u128 = 0xa5de9b50e6db7b5147e92b99ee261809;
let exp_high: u128 = 0xf1d6d37d58114afed2addfedd7c77f7;
assert_eq!(exp_low, low);
assert_eq!(exp_high, high);
}
}
#[test]
fn test_gf128_reduce() {
unsafe {
let low: __m128i = transmute(0x0123456789abcdef0123456789abcdef_u128);
let high: __m128i = transmute(0xabcdef0123456789abcdef0123456789_u128);
let exp = 0xb4b548f1c3c23f86b4b548f1c3c21572_u128;
let res: u128 = transmute(gf128_reduce(low, high));
println!("res: {res:b}");
println!("exp: {exp:b}");
assert_eq!(exp, res);
}
}
}
#[cfg(all(is_nightly, test, target_feature = "pclmulqdq"))]
mod benches {
extern crate test;
use std::{hint::black_box, mem::transmute};
use rand::{RngExt, rng};
use test::Bencher;
#[bench]
fn bench_gf128_mul(b: &mut Bencher) {
let [low, high] = unsafe { transmute(rng().random::<[u128; 2]>()) };
b.iter(|| black_box(unsafe { super::gf128_mul(black_box(low), black_box(high)) }));
}
#[bench]
fn bench_gf128_reduce(b: &mut Bencher) {
let [low, high] = unsafe { transmute(rng().random::<[u128; 2]>()) };
b.iter(|| black_box(unsafe { super::gf128_reduce(black_box(low), black_box(high)) }));
}
}
}
#[allow(dead_code)]
mod scalar {
#[inline]
pub fn gf128_mul(a: u128, b: u128) -> u128 {
let (low, high) = clmul128(a, b);
gf128_reduce(low, high)
}
#[inline]
pub fn clmul128(a: u128, b: u128) -> (u128, u128) {
let (a_low, a_high) = (a as u64, (a >> 64) as u64);
let (b_low, b_high) = (b as u64, (b >> 64) as u64);
let ab_low = clmul64(a_low, b_low);
let ab_high = clmul64(a_high, b_high);
let ab_mid = clmul64(a_low ^ a_high, b_low ^ b_high) ^ ab_low ^ ab_high;
let low = ab_low ^ (ab_mid << 64);
let high = ab_high ^ (ab_mid >> 64);
(low, high)
}
#[inline]
fn clmul64(x: u64, y: u64) -> u128 {
pub const fn mask(offset: u32) -> u128 {
let mut mask: u128 = 0;
let mut i = offset;
while i < 128 {
mask |= 1 << i;
i += 5;
}
mask
}
let x0 = x as u128 & const { mask(0) };
let x1 = x as u128 & const { mask(1) };
let x2 = x as u128 & const { mask(2) };
let x3 = x as u128 & const { mask(3) };
let x4 = x as u128 & const { mask(4) };
let y0 = y as u128 & const { mask(0) };
let y1 = y as u128 & const { mask(1) };
let y2 = y as u128 & const { mask(2) };
let y3 = y as u128 & const { mask(3) };
let y4 = y as u128 & const { mask(4) };
let mut z0 = (x0 * y0) ^ (x1 * y4) ^ (x2 * y3) ^ (x3 * y2) ^ (x4 * y1);
let mut z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y4) ^ (x3 * y3) ^ (x4 * y2);
let mut z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y4) ^ (x4 * y3);
let mut z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0) ^ (x4 * y4);
let mut z4 = (x0 * y4) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1) ^ (x4 * y0);
z0 &= const { mask(0) };
z1 &= const { mask(1) };
z2 &= const { mask(2) };
z3 &= const { mask(3) };
z4 &= const { mask(4) };
z0 | z1 | z2 | z3 | z4
}
#[inline]
pub fn gf128_reduce(low: u128, high: u128) -> u128 {
#[inline]
fn shift_u128(x: u128, shift: u32) -> (u128, u128) {
let overflow = x >> (128 - shift);
let lower = x << shift;
(overflow, lower)
}
let (ov7, lo7) = shift_u128(high, 7);
let (ov2, lo2) = shift_u128(high, 2);
let (ov1, lo1) = shift_u128(high, 1);
let lo0 = high;
let combined_low = lo7 ^ lo2 ^ lo1 ^ lo0;
let combined_overflow = ov7 ^ ov2 ^ ov1;
let reduced_overflow = (combined_overflow << 7)
^ (combined_overflow << 2)
^ (combined_overflow << 1)
^ combined_overflow;
let poly_contrib = combined_low ^ reduced_overflow;
low ^ poly_contrib
}
#[cfg(test)]
mod tests {
use super::{clmul128, gf128_mul, gf128_reduce};
#[test]
fn test_gf128_mul_zero() {
let a = 0x19831239123916248127031273012381;
let b = 0;
let exp = 0;
let mul = gf128_mul(a, b);
assert_eq!(exp, mul);
}
#[test]
fn test_gf128_mul_one() {
let a = 0x19831239123916248127031273012381;
let b = 1;
let exp = 0x19831239123916248127031273012381;
let mul = gf128_mul(a, b);
assert_eq!(exp, mul);
}
#[test]
fn test_gf128_mul() {
let a = 0x19831239123916248127031273012381;
let b = 0xabcdef0123456789abcdef0123456789;
let exp = 0x63a033d0ed643e85153c50f4268a7d9;
let mul = gf128_mul(a, b);
assert_eq!(exp, mul);
}
#[test]
fn test_gf128_reduce_zero() {
assert_eq!(gf128_reduce(0, 0), 0);
}
#[test]
fn test_gf128_reduce_low_only() {
assert_eq!(gf128_reduce(1, 0), 1);
assert_eq!(gf128_reduce(0x87, 0), 0x87); assert_eq!(gf128_reduce(0xFFFFFFFFFFFFFFFF, 0), 0xFFFFFFFFFFFFFFFF);
}
#[test]
fn test_gf128_reduce_high_only() {
assert_eq!(gf128_reduce(0, 1), 0x87);
assert_eq!(gf128_reduce(0, 2), 0x87 << 1);
assert_eq!(gf128_reduce(0, 3), (0x87 << 1) ^ 0x87);
assert_eq!(gf128_reduce(0, 1 << 63), 0x87 << 63);
}
#[test]
fn test_gf128_reduce_overflow() {
let high = u128::MAX; let low = u128::MAX; assert_eq!(gf128_reduce(low, high), 0xffffffffffffffffffffffffffffc071);
}
#[test]
fn tests_gf128_reduce() {
let low = 0x0123456789abcdef0123456789abcdef;
let high = 0xabcdef0123456789abcdef0123456789;
let exp = 0xb4b548f1c3c23f86b4b548f1c3c21572;
let res = gf128_reduce(low, high);
println!("res: {res:b}");
println!("exp: {exp:b}");
assert_eq!(exp, res);
}
#[test]
fn test_clmul128() {
let a = 0x19831239123916248127031273012381;
let b = 0xabcdef0123456789abcdef0123456789;
let (low, high) = clmul128(a, b);
let exp_low = 0xa5de9b50e6db7b5147e92b99ee261809;
let exp_high = 0xf1d6d37d58114afed2addfedd7c77f7;
assert_eq!(exp_low, low);
assert_eq!(exp_high, high);
}
}
#[cfg(all(is_nightly, test))]
mod benches {
extern crate test;
use criterion::black_box;
use rand::{RngExt, rng};
use test::Bencher;
#[bench]
fn bench_gf128_mul(b: &mut Bencher) {
let [low, high] = rng().random::<[u128; 2]>();
b.iter(|| black_box(super::gf128_mul(black_box(low), black_box(high))));
}
#[bench]
fn bench_gf128_reduce(b: &mut Bencher) {
let [low, high] = rng().random::<[u128; 2]>();
b.iter(|| black_box(super::gf128_reduce(black_box(low), black_box(high))));
}
}
}
#[cfg(all(test, not(miri), target_feature = "pclmulqdq"))]
mod scalar_simd_tests {
use std::mem::transmute;
use proptest::prelude::*;
use super::{clmul, scalar};
fn u128_with_edges() -> impl Strategy<Value = u128> {
prop_oneof![
1 => Just(0u128),
1 => Just(1u128),
1 => Just(u128::MAX),
1 => Just(u128::MAX - 1),
6 => any::<u128>(),
]
}
proptest! {
#[test]
fn test_clmul128(a in u128_with_edges(), b in u128_with_edges()) {
unsafe {
let clmul_res = clmul::clmul128(transmute(a), transmute(b));
let scalar_res = scalar::clmul128(a, b);
assert_eq!(scalar_res.0, transmute(clmul_res.0));
}
}
}
proptest! {
#[test]
fn test_gf128_reduce(a in u128_with_edges(), b in u128_with_edges()) {
unsafe {
let clmul_res = clmul::gf128_reduce(transmute(a), transmute(b));
let scalar_res = scalar::gf128_reduce(a, b);
assert_eq!(scalar_res, transmute(clmul_res));
}
}
}
proptest! {
#[test]
fn test_gf128_mul(a in u128_with_edges(), b in u128_with_edges()) {
unsafe {
let clmul_res = clmul::gf128_mul(transmute(a), transmute(b));
let scalar_res = scalar::gf128_mul(a, b);
assert_eq!(scalar_res, transmute(clmul_res));
}
}
}
}
#[cfg(test)]
mod tests {
use crate::Block;
#[test]
fn test_gf_pow() {
let b: Block = 24646523424323_u128.into();
assert_eq!(Block::ONE, b.gf_pow(0));
assert_eq!(b, b.gf_pow(1));
assert_eq!(b.gf_mul(&b), b.gf_pow(2));
assert_eq!(b.gf_mul(&b.gf_mul(&b)), b.gf_pow(3));
}
}