use std::ops;
pub const POLY: u32 = 0x1100b;
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
#[repr(transparent)]
#[hax_lib::fstar::after(
r#"
let to_gf (s: t_GF16) = Spec.GF16.to_bv s.f_value
"#
)]
pub struct GF16 {
pub value: u16,
}
#[hax_lib::attributes]
impl ops::AddAssign<&GF16> for GF16 {
#[allow(clippy::suspicious_op_assign_impl)]
#[requires(true)]
#[hax_lib::ensures(|result| fstar!(r#"
to_gf self_e_future ==
Spec.GF16.gf_add (to_gf self_) (to_gf other)
"#))]
fn add_assign(&mut self, other: &Self) {
hax_lib::fstar!("Spec.GF16.xor_is_gf_add_lemma self.f_value other.f_value");
self.value ^= other.value;
}
}
#[hax_lib::attributes]
impl ops::AddAssign for GF16 {
#[hax_lib::ensures(|result| fstar!(r#"
to_gf self_e_future ==
Spec.GF16.gf_add (to_gf self_) (to_gf other)
"#))]
fn add_assign(&mut self, other: Self) {
hax_lib::fstar!("Spec.GF16.xor_is_gf_add_lemma self.f_value other.f_value");
self.add_assign(&other);
}
}
#[hax_lib::attributes]
impl ops::Add for GF16 {
type Output = Self;
#[hax_lib::ensures(|result| fstar!(r#"
to_gf result ==
Spec.GF16.gf_add (to_gf self_) (to_gf other)
"#))]
fn add(self, other: Self) -> Self {
let mut out = self;
out += &other;
out
}
}
#[hax_lib::attributes]
impl ops::Add<&GF16> for GF16 {
type Output = Self;
#[hax_lib::ensures(|result| fstar!(r#"
to_gf result ==
Spec.GF16.gf_add (to_gf self_) (to_gf other)
"#))]
fn add(self, other: &Self) -> Self {
let mut out = self;
out += other;
out
}
}
#[hax_lib::attributes]
impl ops::SubAssign<&GF16> for GF16 {
#[allow(clippy::suspicious_op_assign_impl)]
#[hax_lib::ensures(|result| fstar!(r#"
to_gf self_e_future ==
Spec.GF16.gf_sub (to_gf self_) (to_gf other)
"#))]
fn sub_assign(&mut self, other: &Self) {
*self += other;
}
}
#[hax_lib::attributes]
impl ops::SubAssign for GF16 {
#[hax_lib::ensures(|result| fstar!(r#"
to_gf self_e_future ==
Spec.GF16.gf_sub (to_gf self_) (to_gf other)
"#))]
fn sub_assign(&mut self, other: Self) {
self.sub_assign(&other);
}
}
#[hax_lib::attributes]
impl ops::Sub for GF16 {
type Output = Self;
#[hax_lib::ensures(|result| fstar!(r#"
to_gf result ==
Spec.GF16.gf_sub (to_gf self_) (to_gf other)
"#))]
fn sub(self, other: Self) -> Self {
let mut out = self;
out -= &other;
out
}
}
#[hax_lib::attributes]
impl ops::Sub<&GF16> for GF16 {
type Output = Self;
#[hax_lib::ensures(|result| fstar!(r#"
to_gf result ==
Spec.GF16.gf_sub (to_gf self_) (to_gf other)
"#))]
fn sub(self, other: &Self) -> Self {
let mut out = self;
out -= other;
out
}
}
#[hax_lib::attributes]
impl ops::MulAssign<&GF16> for GF16 {
fn mul_assign(&mut self, other: &Self) {
#[cfg(all(
not(hax),
any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")
))]
if check_accelerated::TOKEN.get() {
self.value = accelerated::mul(self.value, other.value);
return;
}
self.value = unaccelerated::mul(self.value, other.value);
}
}
#[hax_lib::attributes]
impl ops::MulAssign for GF16 {
fn mul_assign(&mut self, other: Self) {
self.mul_assign(&other);
}
}
#[hax_lib::attributes]
impl ops::Mul for GF16 {
type Output = Self;
fn mul(self, other: Self) -> Self {
let mut out = self;
out *= &other;
out
}
}
#[hax_lib::attributes]
impl ops::Mul<&GF16> for GF16 {
type Output = Self;
fn mul(self, other: &Self) -> Self {
let mut out = self;
out *= other;
out
}
}
#[hax_lib::attributes]
impl ops::DivAssign<&GF16> for GF16 {
#[allow(clippy::suspicious_op_assign_impl)]
fn div_assign(&mut self, other: &Self) {
*self = self.div_impl(other);
}
}
#[hax_lib::attributes]
impl ops::DivAssign for GF16 {
fn div_assign(&mut self, other: Self) {
*self = self.div_impl(&other);
}
}
#[hax_lib::attributes]
impl ops::Div for GF16 {
type Output = Self;
fn div(self, other: Self) -> Self {
self.div_impl(&other)
}
}
#[hax_lib::attributes]
impl ops::Div<&GF16> for GF16 {
type Output = Self;
fn div(self, other: &Self) -> Self {
self.div_impl(other)
}
}
#[inline]
#[hax_lib::opaque]
pub fn parallel_mult(a: GF16, into: &mut [GF16]) {
let mut i = 0;
while i + 2 <= into.len() {
hax_lib::loop_decreases!(into.len() - i);
(into[i].value, into[i + 1].value) = mul2_u16(a.value, into[i].value, into[i + 1].value);
i += 2;
}
if i < into.len() {
into[i] *= a;
}
}
#[hax_lib::opaque]
fn mul2_u16(a: u16, b1: u16, b2: u16) -> (u16, u16) {
#[cfg(all(
not(hax),
any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")
))]
if check_accelerated::TOKEN.get() {
return accelerated::mul2(a, b1, b2);
}
(unaccelerated::mul(a, b1), unaccelerated::mul(a, b2))
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod accelerated {
#[cfg(target_arch = "x86")]
use core::arch::x86 as arch;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as arch;
pub fn mul(a: u16, b: u16) -> u16 {
mul2(a, b, 0).0
}
#[inline]
#[target_feature(enable = "pclmulqdq")]
unsafe fn mul2_unreduced(a: u16, b1: u16, b2: u16) -> (u32, u32) {
let a = arch::_mm_set_epi64x(0, a as i64);
let b = arch::_mm_set_epi64x(0, ((b2 as i64) << 32) | (b1 as i64));
let clmul = arch::_mm_clmulepi64_si128(a, b, 0);
let b1out = arch::_mm_cvtsi128_si32(clmul) as u32;
let b2out = arch::_mm_cvtsi128_si32(arch::_mm_srli_si128(clmul, 4)) as u32;
(b1out, b2out)
}
pub fn mul2(a: u16, b1: u16, b2: u16) -> (u16, u16) {
let unreduced_products = unsafe { mul2_unreduced(a, b1, b2) };
(
super::reduce::poly_reduce(unreduced_products.0),
super::reduce::poly_reduce(unreduced_products.1),
)
}
}
#[cfg(target_arch = "aarch64")]
mod accelerated {
use core::arch::aarch64;
pub fn mul(a: u16, b: u16) -> u16 {
mul2(a, b, 0).0
}
#[inline]
#[target_feature(enable = "neon")]
unsafe fn mul2_unreduced(a: u16, b1: u16, b2: u16) -> u128 {
aarch64::vmull_p64(a as u64, ((b1 as u64) << 32) | (b2 as u64))
}
pub fn mul2(a: u16, b1: u16, b2: u16) -> (u16, u16) {
let unreduced_product = unsafe { mul2_unreduced(a, b1, b2) };
(
super::reduce::poly_reduce((unreduced_product >> 32) as u32),
super::reduce::poly_reduce(unreduced_product as u32),
)
}
}
#[cfg(all(
not(hax),
any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")
))]
mod check_accelerated {
#[cfg(target_arch = "aarch64")]
cpufeatures::new!(use_accelerated, "aes"); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
cpufeatures::new!(use_accelerated, "pclmulqdq");
use std::sync::LazyLock;
pub(crate) static TOKEN: LazyLock<use_accelerated::InitToken> =
LazyLock::new(use_accelerated::init);
}
mod unaccelerated {
#[hax_lib::fstar::options("--fuel 2")]
#[hax_lib::ensures(|result| fstar!(r#"
let open Spec.GF16 in
to_bv result ==
poly_mul (to_bv a) (to_bv b)"#))]
const fn poly_mul(a: u16, b: u16) -> u32 {
let mut acc: u32 = 0;
let me = a as u32;
let mut shift: u32 = 0;
hax_lib::fstar!(
r#"
let open Spec.GF16 in
assert_norm Spec.GF16.(poly_mul_i (to_bv a) (to_bv b) 0 == zero #32);
zero_lemma #U32;
up_cast_lemma #U16 #U32 a
"#
);
while shift < 16 {
hax_lib::loop_invariant!(fstar!(
r#"
let open Spec.GF16 in
v shift <= 16 /\
to_bv acc == poly_mul_i (to_bv a) (to_bv b) (v shift)
"#
));
hax_lib::loop_decreases!(16 - shift);
hax_lib::fstar!(
r#"
let open Spec.GF16 in
shift_left_bit_select_lemma b shift;
up_cast_shift_left_lemma a shift;
lemma_add_lift #(16+v shift) #32 (poly_mul_x_k (to_bv a) (v shift)) (to_bv acc);
xor_is_gf_add_lemma acc (me <<! shift)
"#
);
if 0 != b & (1 << shift) {
hax_lib::fstar!(
r#"
let open Spec.GF16 in
assert ((to_bv b).[v shift] == true);
assert (poly_mul_i (to_bv a) (to_bv b) (v shift + 1) ==
gf_add (poly_mul_i (to_bv a) (to_bv b) (v shift))
(poly_mul_x_k (to_bv a) (v shift)))
"#
);
acc ^= me << shift;
}
shift += 1;
}
acc
}
#[allow(dead_code)]
#[hax_lib::ensures(|result| fstar!(r#"
let open Spec.GF16 in
let (r1, r2) = result in
to_bv r1 == gf16_mul (to_bv a) (to_bv b1) /\
to_bv r2 == gf16_mul (to_bv a) (to_bv b2)
"#))]
pub fn mul2(a: u16, b1: u16, b2: u16) -> (u16, u16) {
(mul(a, b1), mul(a, b2))
}
#[hax_lib::ensures(|result| fstar!(r#"
let open Spec.GF16 in
to_bv result ==
gf16_mul (to_bv a) (to_bv b)"#))]
pub const fn mul(a: u16, b: u16) -> u16 {
super::reduce::poly_reduce(poly_mul(a, b))
}
}
mod reduce {
use super::POLY;
#[hax_lib::fstar::verification_status(panic_free)]
#[hax_lib::ensures(|result| fstar!(r#"
Spec.GF16.(to_bv result == poly_reduce #gf16 (to_bv v))
"#))]
pub const fn poly_reduce(v: u32) -> u16 {
let mut v = v;
let i1 = (v >> 24) as usize;
v ^= (REDUCE_BYTES[i1] as u32) << 8;
let shifted_v = (v >> 16) as usize;
let i2 = shifted_v & 0xFF;
hax_lib::fstar!("logand_lemma $shifted_v (mk_usize 255)");
v ^= REDUCE_BYTES[i2] as u32;
v as u16
}
const fn reduce_from_byte(mut a: u8) -> u32 {
let mut out = 0u32;
let mut i: u32 = 8;
while i > 0 {
hax_lib::loop_invariant!(i <= 8);
hax_lib::loop_decreases!(i);
i -= 1;
if (1 << i) & a != 0 {
out ^= POLY << i;
a ^= ((POLY << i) >> 16) as u8;
}
}
out
}
const fn reduce_bytes() -> [u16; 256] {
let mut out = [0u16; 256];
let mut i = 0;
while i < 256 {
hax_lib::loop_invariant!(hax_lib::prop::constructors::and(
(i <= 256).into(),
hax_lib::forall(|j: usize| hax_lib::implies(
j < i,
out[j] == (reduce_from_byte(j as u8) as u16)
))
));
hax_lib::loop_decreases!(256 - i);
out[i] = reduce_from_byte(i as u8) as u16;
i += 1;
}
out
}
const REDUCE_BYTES: [u16; 256] = reduce_bytes();
}
impl GF16 {
pub const ZERO: Self = Self { value: 0 };
pub const ONE: Self = Self { value: 1 };
pub fn new(value: u16) -> Self {
Self { value }
}
pub fn inv(&self) -> GF16 {
GF16::ONE.div_impl(self)
}
fn div_impl(&self, other: &Self) -> Self {
let mut square = *other * *other;
let mut out = *self;
for _i in 1..16 {
(square.value, out.value) = mul2_u16(square.value, square.value, out.value);
}
out
}
pub const fn const_mul(&self, other: &Self) -> Self {
Self {
value: unaccelerated::mul(self.value, other.value),
}
}
pub const fn const_sub(&self, other: &Self) -> Self {
Self {
value: self.value ^ other.value,
}
}
pub const fn const_add(&self, other: &Self) -> Self {
Self {
value: self.value ^ other.value,
}
}
pub const fn const_div(&self, other: &Self) -> Self {
let mut square = *other;
let mut out = *self;
{
let mut i: usize = 1;
while i < 16 {
hax_lib::loop_invariant!(i <= 16);
hax_lib::loop_decreases!(16 - i);
square = square.const_mul(&square);
out = out.const_mul(&square);
i += 1;
}
}
out
}
pub const fn const_inv(&self) -> GF16 {
GF16::ONE.const_div(self)
}
}
#[cfg(test)]
mod test {
use super::*;
use galois_field_2pm::gf2::GFu16;
use galois_field_2pm::GaloisField;
use rand::RngCore;
type ExternalGF16 = GFu16<{ POLY as u128 }>;
#[test]
fn add() {
let mut rng = rand::rng();
for _i in 0..100 {
let x = rng.next_u32() as u16;
let y = rng.next_u32() as u16;
assert_eq!(
(GF16 { value: x } + GF16 { value: y }).value,
(ExternalGF16::new(x) + ExternalGF16::new(y)).value
);
}
}
#[test]
fn mul() {
let mut rng = rand::rng();
for _i in 0..100 {
let x = rng.next_u32() as u16;
let y = rng.next_u32() as u16;
let a = (GF16 { value: x } * GF16 { value: y }).value;
let b = (ExternalGF16::new(x) * ExternalGF16::new(y)).value;
println!("{x:04x} * {y:04x} = {b:04x}");
assert_eq!(a, b);
}
}
#[test]
fn inv() {
let mut rng = rand::rng();
for _i in 0..100 {
let x = rng.next_u32() as u16;
assert_eq!(
(GF16 { value: x } * GF16 { value: x }.inv()).value,
GF16::ONE.value
);
}
}
#[test]
fn div() {
let mut rng = rand::rng();
for _i in 0..100 {
let x = rng.next_u32() as u16;
let y = rng.next_u32() as u16;
if y == 0 {
continue;
}
assert_eq!(
(GF16 { value: x } / GF16 { value: y }).value,
(ExternalGF16::new(x) / ExternalGF16::new(y)).value
);
}
}
#[cfg(all(target_arch = "x86_64", target_feature = "sse2",))]
#[test]
fn x86_barrett_reduction() {
use super::POLY;
const POLY_128: [u32; 4] = [POLY, 0, 0, 0]; const POLY_M128I: core::arch::x86_64::__m128i = unsafe { core::mem::transmute(POLY_128) };
const POLY_BARRETT_REDUCTION: u32 = 0x1111a; const POLYB_128: [u32; 4] = [POLY_BARRETT_REDUCTION, 0, 0, 0]; const POLYB_M128I: core::arch::x86_64::__m128i = unsafe { core::mem::transmute(POLYB_128) };
let (a, b) = (0xf5u16, 0x93u16);
let unreduced_product = unsafe {
let a = core::arch::x86_64::_mm_set_epi64x(0, a as i64);
let b = core::arch::x86_64::_mm_set_epi64x(0, b as i64);
core::arch::x86_64::_mm_clmulepi64_si128(a, b, 0)
};
let result = unsafe {
let quotient =
core::arch::x86_64::_mm_clmulepi64_si128(unreduced_product, POLYB_M128I, 0);
let quotient_shifted = core::arch::x86_64::_mm_shuffle_epi32(quotient, 0xf9);
let subtrahend =
core::arch::x86_64::_mm_clmulepi64_si128(POLY_M128I, quotient_shifted, 0);
core::arch::x86_64::_mm_cvtsi128_si64(core::arch::x86_64::_mm_xor_si128(
unreduced_product,
subtrahend,
)) as u16
};
assert_eq!(
result,
super::reduce::poly_reduce(unsafe {
core::arch::x86_64::_mm_cvtsi128_si64(unreduced_product)
} as u32)
);
}
}