use alloc::vec::Vec;
use core::arch::wasm32::{
i32x4_shuffle, i64x2_add, i64x2_extmul_low_u32x4, i64x2_gt, i64x2_shl, i64x2_shuffle,
i64x2_sub, u64x2_shr, u64x2_splat, v128, v128_and, v128_andnot, v128_or, v128_xor,
};
use core::fmt::Debug;
use core::iter::{Product, Sum};
use core::mem::transmute;
use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
use p3_field::exponentiation::exp_10540996611094048183;
use p3_field::op_assign_macros::{
impl_add_assign, impl_add_base_field, impl_div_methods, impl_mul_base_field, impl_mul_methods,
impl_packed_field_div, impl_packed_value, impl_rng, impl_sub_assign, impl_sub_base_field,
impl_sum_prod_base_field, ring_sum,
};
use p3_field::{
Algebra, Field, InjectiveMonomial, PackedField, PackedFieldPow2, PackedValue,
PermutationMonomial, PrimeCharacteristicRing, PrimeField64,
};
use p3_util::reconstitute_from_base;
use rand::distr::{Distribution, StandardUniform};
use rand::{Rng, RngExt};
use crate::{Goldilocks, P};
const WIDTH: usize = 2;
const EPSILON: u64 = Goldilocks::ORDER_U64.wrapping_neg();
const _LAYOUT_INVARIANTS: () = {
assert!(size_of::<[Goldilocks; WIDTH]>() == size_of::<v128>());
assert!(size_of::<Goldilocks>() == size_of::<u64>());
};
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
#[repr(transparent)]
#[must_use]
pub struct PackedGoldilocksWasmSimd128(pub [Goldilocks; WIDTH]);
impl PackedGoldilocksWasmSimd128 {
#[inline]
#[must_use]
pub(crate) fn to_vector(self) -> v128 {
unsafe { transmute(self) }
}
#[inline]
pub(crate) fn from_vector(vector: v128) -> Self {
unsafe { transmute(vector) }
}
#[inline]
const fn broadcast(value: Goldilocks) -> Self {
Self([value; WIDTH])
}
}
impl From<Goldilocks> for PackedGoldilocksWasmSimd128 {
fn from(x: Goldilocks) -> Self {
Self::broadcast(x)
}
}
impl Add for PackedGoldilocksWasmSimd128 {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self {
Self::from_vector(add(self.to_vector(), rhs.to_vector()))
}
}
impl Sub for PackedGoldilocksWasmSimd128 {
type Output = Self;
#[inline]
fn sub(self, rhs: Self) -> Self {
Self::from_vector(sub(self.to_vector(), rhs.to_vector()))
}
}
impl Neg for PackedGoldilocksWasmSimd128 {
type Output = Self;
#[inline]
fn neg(self) -> Self {
Self::from_vector(neg(self.to_vector()))
}
}
impl Mul for PackedGoldilocksWasmSimd128 {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self {
Self::from_vector(mul(self.to_vector(), rhs.to_vector()))
}
}
impl_add_assign!(PackedGoldilocksWasmSimd128);
impl_sub_assign!(PackedGoldilocksWasmSimd128);
impl_mul_methods!(PackedGoldilocksWasmSimd128);
ring_sum!(PackedGoldilocksWasmSimd128);
impl_rng!(PackedGoldilocksWasmSimd128);
impl PrimeCharacteristicRing for PackedGoldilocksWasmSimd128 {
type PrimeSubfield = Goldilocks;
const ZERO: Self = Self::broadcast(Goldilocks::ZERO);
const ONE: Self = Self::broadcast(Goldilocks::ONE);
const TWO: Self = Self::broadcast(Goldilocks::TWO);
const NEG_ONE: Self = Self::broadcast(Goldilocks::NEG_ONE);
#[inline]
fn from_prime_subfield(f: Self::PrimeSubfield) -> Self {
f.into()
}
#[inline]
fn halve(&self) -> Self {
Self::from_vector(halve(self.to_vector()))
}
#[inline]
fn double(&self) -> Self {
Self::from_vector(double(self.to_vector()))
}
#[inline]
fn square(&self) -> Self {
Self::from_vector(square(self.to_vector()))
}
#[inline]
fn zero_vec(len: usize) -> Vec<Self> {
unsafe { reconstitute_from_base(Goldilocks::zero_vec(len * WIDTH)) }
}
#[inline]
fn dot_product<const N: usize>(lhs: &[Self; N], rhs: &[Self; N]) -> Self {
Self::from_fn(|lane| {
let lhs_lane: [Goldilocks; N] = core::array::from_fn(|i| lhs[i].as_slice()[lane]);
let rhs_lane: [Goldilocks; N] = core::array::from_fn(|i| rhs[i].as_slice()[lane]);
Goldilocks::dot_product(&lhs_lane, &rhs_lane)
})
}
}
impl InjectiveMonomial<7> for PackedGoldilocksWasmSimd128 {}
impl PermutationMonomial<7> for PackedGoldilocksWasmSimd128 {
fn injective_exp_root_n(&self) -> Self {
exp_10540996611094048183(*self)
}
}
impl_add_base_field!(PackedGoldilocksWasmSimd128, Goldilocks);
impl_sub_base_field!(PackedGoldilocksWasmSimd128, Goldilocks);
impl_mul_base_field!(PackedGoldilocksWasmSimd128, Goldilocks);
impl_div_methods!(PackedGoldilocksWasmSimd128, Goldilocks);
impl_packed_field_div!(PackedGoldilocksWasmSimd128);
impl_sum_prod_base_field!(PackedGoldilocksWasmSimd128, Goldilocks);
impl Algebra<Goldilocks> for PackedGoldilocksWasmSimd128 {
const BATCHED_LC_CHUNK: usize = 2;
#[inline]
fn mixed_dot_product<const N: usize>(a: &[Self; N], f: &[Goldilocks; N]) -> Self {
Self::from_fn(|lane| {
let a_lane: [Goldilocks; N] = core::array::from_fn(|i| a[i].as_slice()[lane]);
Goldilocks::dot_product(&a_lane, f)
})
}
}
impl_packed_value!(PackedGoldilocksWasmSimd128, Goldilocks, WIDTH);
unsafe impl PackedField for PackedGoldilocksWasmSimd128 {
type Scalar = Goldilocks;
}
#[inline]
pub fn interleave_u64(v0: v128, v1: v128) -> (v128, v128) {
let r0 = i64x2_shuffle::<0, 2>(v0, v1);
let r1 = i64x2_shuffle::<1, 3>(v0, v1);
(r0, r1)
}
unsafe impl PackedFieldPow2 for PackedGoldilocksWasmSimd128 {
fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) {
let (v0, v1) = (self.to_vector(), other.to_vector());
let (res0, res1) = match block_len {
1 => interleave_u64(v0, v1),
2 => (v0, v1),
_ => panic!("unsupported block length"),
};
(Self::from_vector(res0), Self::from_vector(res1))
}
}
const SIGN_BIT: v128 =
unsafe { transmute::<[u64; WIDTH], v128>([0x8000_0000_0000_0000u64; WIDTH]) };
const SHIFTED_FIELD_ORDER: v128 = unsafe {
transmute::<[u64; WIDTH], v128>([Goldilocks::ORDER_U64 ^ 0x8000_0000_0000_0000u64; WIDTH])
};
const EPSILON_VEC: v128 = unsafe { transmute::<[u64; WIDTH], v128>([EPSILON; WIDTH]) };
#[inline(always)]
fn shift(x: v128) -> v128 {
v128_xor(x, SIGN_BIT)
}
#[inline(always)]
fn canonicalize_s(x_s: v128) -> v128 {
let mask = i64x2_gt(SHIFTED_FIELD_ORDER, x_s);
let wrapback_amt = v128_andnot(EPSILON_VEC, mask);
i64x2_add(x_s, wrapback_amt)
}
#[inline(always)]
fn add_no_double_overflow_64_64s_s(x: v128, y_s: v128) -> v128 {
let res_wrapped_s = i64x2_add(x, y_s);
let mask = i64x2_gt(y_s, res_wrapped_s);
let wrapback_amt = u64x2_shr(mask, 32);
i64x2_add(res_wrapped_s, wrapback_amt)
}
#[inline]
fn add(x: v128, y: v128) -> v128 {
let y_s = shift(y);
let res_s = add_no_double_overflow_64_64s_s(x, canonicalize_s(y_s));
shift(res_s)
}
#[inline]
fn sub(x: v128, y: v128) -> v128 {
let y_s = canonicalize_s(shift(y));
let x_s = shift(x);
let mask = i64x2_gt(y_s, x_s);
let wrapback_amt = u64x2_shr(mask, 32);
let res_wrapped = i64x2_sub(x_s, y_s);
i64x2_sub(res_wrapped, wrapback_amt)
}
#[inline]
fn neg(y: v128) -> v128 {
let y_s = shift(y);
i64x2_sub(SHIFTED_FIELD_ORDER, canonicalize_s(y_s))
}
#[inline(always)]
pub(crate) fn halve(input: v128) -> v128 {
let one = u64x2_splat(1);
let zero = u64x2_splat(0);
let half_v = u64x2_splat(P.div_ceil(2));
let least_bit = v128_and(input, one);
let t = u64x2_shr(input, 1);
let neg_least_bit = i64x2_sub(zero, least_bit);
let maybe_half = v128_and(half_v, neg_least_bit);
i64x2_add(t, maybe_half)
}
#[inline(always)]
fn lo32(a: v128) -> v128 {
i32x4_shuffle::<0, 2, 0, 0>(a, a)
}
#[inline(always)]
fn hi32(a: v128) -> v128 {
i32x4_shuffle::<1, 3, 0, 0>(a, a)
}
#[inline(always)]
fn mul_u32_lanes(a_packed: v128, b_packed: v128) -> v128 {
i64x2_extmul_low_u32x4(a_packed, b_packed)
}
#[inline]
fn mul64_64(x: v128, y: v128) -> (v128, v128) {
let x_lo = lo32(x);
let x_hi = hi32(x);
let y_lo = lo32(y);
let y_hi = hi32(y);
let ll = mul_u32_lanes(x_lo, y_lo); let lh = mul_u32_lanes(x_lo, y_hi); let hl = mul_u32_lanes(x_hi, y_lo);
let hh = mul_u32_lanes(x_hi, y_hi);
let ll_hi = u64x2_shr(ll, 32);
let t0 = i64x2_add(hl, ll_hi);
let t0_lo = v128_and(t0, EPSILON_VEC);
let t0_hi = u64x2_shr(t0, 32);
let t1 = i64x2_add(lh, t0_lo);
let t2 = i64x2_add(hh, t0_hi);
let t1_hi = u64x2_shr(t1, 32);
let res_hi = i64x2_add(t2, t1_hi);
let ll_lo32 = v128_and(ll, EPSILON_VEC);
let t1_lo32 = v128_and(t1, EPSILON_VEC);
let t1_shifted = i64x2_shl(t1_lo32, 32);
let res_lo = v128_or(ll_lo32, t1_shifted);
(res_hi, res_lo)
}
#[inline(always)]
fn add_small_64s_64_s(x_s: v128, y: v128) -> v128 {
let res_wrapped_s = i64x2_add(x_s, y);
let mask = i64x2_gt(x_s, res_wrapped_s); let wrapback_amt = u64x2_shr(mask, 32); i64x2_add(res_wrapped_s, wrapback_amt)
}
#[inline(always)]
fn sub_small_64s_64_s(x_s: v128, y: v128) -> v128 {
let res_wrapped_s = i64x2_sub(x_s, y);
let mask = i64x2_gt(res_wrapped_s, x_s); let wrapback_amt = u64x2_shr(mask, 32);
i64x2_sub(res_wrapped_s, wrapback_amt)
}
#[inline]
fn reduce128(hi: v128, lo: v128) -> v128 {
let lo_s = shift(lo);
let hi_hi = u64x2_shr(hi, 32);
let lo1_s = sub_small_64s_64_s(lo_s, hi_hi);
let hi_lo32 = v128_and(hi, EPSILON_VEC);
let hi_lo32_shifted = i64x2_shl(hi_lo32, 32);
let t1 = i64x2_sub(hi_lo32_shifted, hi_lo32);
let lo2_s = add_small_64s_64_s(lo1_s, t1);
shift(lo2_s)
}
#[inline]
fn mul(x: v128, y: v128) -> v128 {
let (hi, lo) = mul64_64(x, y);
reduce128(hi, lo)
}
#[inline]
fn square64(x: v128) -> (v128, v128) {
let x_lo = lo32(x);
let x_hi = hi32(x);
let ll = mul_u32_lanes(x_lo, x_lo);
let lh = mul_u32_lanes(x_lo, x_hi);
let hh = mul_u32_lanes(x_hi, x_hi);
let ll_hi = u64x2_shr(ll, 33);
let t0 = i64x2_add(lh, ll_hi);
let t0_hi = u64x2_shr(t0, 31);
let res_hi = i64x2_add(hh, t0_hi);
let lh_shifted = i64x2_shl(lh, 33);
let res_lo = i64x2_add(ll, lh_shifted);
(res_hi, res_lo)
}
#[inline]
fn square(x: v128) -> v128 {
let (hi, lo) = square64(x);
reduce128(hi, lo)
}
#[inline(always)]
fn double(x: v128) -> v128 {
add(x, x)
}
#[cfg(test)]
mod tests {
use p3_field_testing::test_packed_field;
use super::{Goldilocks, PackedGoldilocksWasmSimd128, WIDTH};
const SPECIAL_VALS: [Goldilocks; WIDTH] =
Goldilocks::new_array([0xFFFF_FFFF_0000_0000, 0xFFFF_FFFF_FFFF_FFFF]);
const ZEROS: PackedGoldilocksWasmSimd128 =
PackedGoldilocksWasmSimd128(Goldilocks::new_array([
0x0000_0000_0000_0000,
0xFFFF_FFFF_0000_0001, ]));
const ONES: PackedGoldilocksWasmSimd128 = PackedGoldilocksWasmSimd128(Goldilocks::new_array([
0x0000_0000_0000_0001,
0xFFFF_FFFF_0000_0002, ]));
test_packed_field!(
crate::PackedGoldilocksWasmSimd128,
&[super::ZEROS],
&[super::ONES],
crate::PackedGoldilocksWasmSimd128(super::SPECIAL_VALS)
);
}