use crate::keccak_batch::keccak256_batch;
const P0: u64 = 0xFFFFFFFEFFFFFC2F;
const P1: u64 = 0xFFFFFFFFFFFFFFFF;
const P2: u64 = 0xFFFFFFFFFFFFFFFF;
const P3: u64 = 0xFFFFFFFFFFFFFFFF;
const N0: u64 = 0xBFD25E8CD0364141;
const N1: u64 = 0xBAAEDCE6AF48A03B;
const N2: u64 = 0xFFFFFFFFFFFFFFFE;
const N3: u64 = 0xFFFFFFFFFFFFFFFF;
const PINV4_0: u64 = 0xFFFFFFFFBFFFFF0C;
const PINV4_1: u64 = 0xFFFFFFFFFFFFFFFF;
const PINV4_2: u64 = 0xFFFFFFFFFFFFFFFF;
const PINV4_3: u64 = 0x3FFFFFFFFFFFFFFF;
const N_MINUS2_0: u64 = 0xBFD25E8CD036413F;
const N_MINUS2_1: u64 = 0xBAAEDCE6AF48A03B;
const N_MINUS2_2: u64 = 0xFFFFFFFFFFFFFFFE;
const N_MINUS2_3: u64 = 0xFFFFFFFFFFFFFFFF;
const P_MINUS2_0: u64 = 0xFFFFFFFEFFFFFC2D;
const P_MINUS2_1: u64 = 0xFFFFFFFFFFFFFFFF;
const P_MINUS2_2: u64 = 0xFFFFFFFFFFFFFFFF;
const P_MINUS2_3: u64 = 0xFFFFFFFFFFFFFFFF;
const BETA0: u64 = 0x3EC693D68E6AFA40;
const BETA1: u64 = 0x630FB68AED0A766A;
const BETA2: u64 = 0x919BB86153CBCB16;
const BETA3: u64 = 0x851695D49A83F8EF;
const LAMBDA0: u64 = 0xE0CFC810B51283CE;
const LAMBDA1: u64 = 0xA880B9FC8EC739C2;
const LAMBDA2: u64 = 0x5AD9E3FD77ED9BA4;
const LAMBDA3: u64 = 0xAC9C52B33FA3CF1F;
const G1_0: u64 = 0xE893209A45DBB031;
const G1_1: u64 = 0x3DAA8A1471E8CA7F;
const G1_2: u64 = 0xE86C90E49284EB15;
const G1_3: u64 = 0x3086D221A7D46BCD;
const G2_0: u64 = 0x1571B4AE8AC47F71;
const G2_1: u64 = 0x221208AC9DF506C6;
const G2_2: u64 = 0x6F547FA90ABFE4C4;
const G2_3: u64 = 0xE4437ED6010E8828;
const B1_0: u64 = 0x6F547FA90ABFE4C3;
const B1_1: u64 = 0xE4437ED6010E8828;
const B2_0: u64 = 0xE86C90E49284EB15;
const B2_1: u64 = 0x3086D221A7D46BCD;
const G_TABLE: [([u64; 4], [u64; 4]); 8] = [
(
[
0x59F2815B16F81798,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
],
[
0x9C47D08FFB10D4B8,
0xFD17B448A6855419,
0x5DA4FBFC0E1108A8,
0x483ADA7726A3C465,
],
),
(
[
0x8601F113BCE036F9,
0xB531C845836F99B0,
0x49344F85F89D5229,
0xF9308A019258C310,
],
[
0x6CB9FD7584B8E672,
0x6500A99934C2231B,
0x0FE337E62A37F356,
0x388F7B0F632DE814,
],
),
(
[
0xCBA8D569B240EFE4,
0xE88B84BDDC619AB7,
0x55B4A7250A5C5128,
0x2F8BDE4D1A072093,
],
[
0xDCA87D3AA6AC62D6,
0xF788271BAB0D6840,
0xD4DBA9DDA6C9C426,
0xD8AC222636E5E3D6,
],
),
(
[
0xE92BDDEDCAC4F9BC,
0x3D419B7E0330E39C,
0xA398F365F2EA7A0E,
0x5CBDF0646E5DB4EA,
],
[
0xA5082628087264DA,
0xA813D0B813FDE7B5,
0xA3178D6D861A54DB,
0x6AEBCA40BA255960,
],
),
(
[
0xC35F110DFC27CCBE,
0xE09796974C57E714,
0x09AD178A9F559ABD,
0xACD484E2F0C7F653,
],
[
0x05CC262AC64F9C37,
0xADD888A4375F8E0F,
0x64380971763B61E9,
0xCC338921B0A7D9FD,
],
),
(
[
0xBBEC17895DA008CB,
0x5649980BE5C17891,
0x5EF4246B70C65AAC,
0x774AE7F858A9411E,
],
[
0x301D74C9C953C61B,
0x372DB1E2DFF9D6A8,
0x0243DD56D7B7B365,
0xD984A032EB6B5E19,
],
),
(
[
0xDEEDDF8F19405AA8,
0xB075FBC6610E58CD,
0xC7D1D205C3748651,
0xF28773C2D975288B,
],
[
0x29B5CB52DB03ED81,
0x3A1A06DA521FA91F,
0x758212EB65CDAF47,
0x0AB0902E8D880A89,
],
),
(
[
0x44ADBCF8E27E080E,
0x31E5946F3C85F79E,
0x5A465AE3095FF411,
0xD7924D4F7D43EA96,
],
[
0xC504DC9FF6A26B58,
0xEA40AF2BD896D3A5,
0x83842EC228CC6DEF,
0x581E2872A86C72A6,
],
),
];
const PHI_G_TABLE: [([u64; 4], [u64; 4]); 8] = [
(
[
0xFE51DE5EE84F50FB,
0x763BBF1E531BED98,
0xFF5E9AB39AE8D1D3,
0xC994B69768832BCB,
],
[
0x9C47D08FFB10D4B8,
0xFD17B448A6855419,
0x5DA4FBFC0E1108A8,
0x483ADA7726A3C465,
],
),
(
[
0x820D9C5DCBFF5636,
0xBFDC5797B5B3D832,
0x28FE22AADD39B3A6,
0x276096FAFA87A1A4,
],
[
0x6CB9FD7584B8E672,
0x6500A99934C2231B,
0x0FE337E62A37F356,
0x388F7B0F632DE814,
],
),
(
[
0x20CAC14EB816D5E3,
0x772F120342CDCD7C,
0xB2AC03DF28EA6865,
0x9CF8CECF391E958C,
],
[
0xDCA87D3AA6AC62D6,
0xF788271BAB0D6840,
0xD4DBA9DDA6C9C426,
0xD8AC222636E5E3D6,
],
),
(
[
0xDB0FB9A2E6E745DF,
0xB583439FED1FA1B8,
0xB76847C84C7FC583,
0x8F4FA12645B83F9D,
],
[
0xA5082628087264DA,
0xA813D0B813FDE7B5,
0xA3178D6D861A54DB,
0x6AEBCA40BA255960,
],
),
(
[
0x1BD35DC19E42F14E,
0x6A029B72C43AD40A,
0x7AED8FC57451BA21,
0xCB77771990F32193,
],
[
0x05CC262AC64F9C37,
0xADD888A4375F8E0F,
0x64380971763B61E9,
0xCC338921B0A7D9FD,
],
),
(
[
0x7E14A540E73F567D,
0x3030CC3D0EDE914D,
0x13825F52D07A8B2D,
0x36C04436903912C4,
],
[
0x301D74C9C953C61B,
0x372DB1E2DFF9D6A8,
0x0243DD56D7B7B365,
0xD984A032EB6B5E19,
],
),
(
[
0xC06732049F5FE73E,
0x1CF9856254B4A1CF,
0x3129C1B4C38F0173,
0x1C2B3405DAD246D2,
],
[
0x29B5CB52DB03ED81,
0x3A1A06DA521FA91F,
0x758212EB65CDAF47,
0x0AB0902E8D880A89,
],
),
(
[
0x80919EF8ABD03C9C,
0xC84E2FC701B96228,
0x979E5CF7A574A2A6,
0xA80EA1AA8CC2D01F,
],
[
0xC504DC9FF6A26B58,
0xEA40AF2BD896D3A5,
0x83842EC228CC6DEF,
0x581E2872A86C72A6,
],
),
];
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct U256(pub [u64; 4]);
impl U256 {
const ZERO: U256 = U256([0; 4]);
const ONE: U256 = U256([1, 0, 0, 0]);
fn from_be_bytes(b: &[u8; 32]) -> Self {
U256([
u64::from_be_bytes(b[24..32].try_into().unwrap()),
u64::from_be_bytes(b[16..24].try_into().unwrap()),
u64::from_be_bytes(b[8..16].try_into().unwrap()),
u64::from_be_bytes(b[0..8].try_into().unwrap()),
])
}
fn is_zero(&self) -> bool {
self.0 == [0; 4]
}
fn bit(&self, i: usize) -> bool {
(self.0[i / 64] >> (i % 64)) & 1 == 1
}
fn ge(&self, rhs: &U256) -> bool {
for i in (0..4).rev() {
match self.0[i].cmp(&rhs.0[i]) {
std::cmp::Ordering::Greater => return true,
std::cmp::Ordering::Less => return false,
_ => {}
}
}
true
}
fn adc(&self, rhs: &U256) -> (U256, u64) {
let mut limbs = [0u64; 4];
let mut carry = 0u128;
for i in 0..4 {
carry = self.0[i] as u128 + rhs.0[i] as u128 + (carry >> 64);
limbs[i] = carry as u64;
}
(U256(limbs), (carry >> 64) as u64)
}
fn sbb(&self, rhs: &U256) -> (U256, u64) {
let mut limbs = [0u64; 4];
let mut borrow = 0i128;
for i in 0..4 {
let diff = self.0[i] as i128 - rhs.0[i] as i128 + borrow;
limbs[i] = diff as u64;
borrow = diff >> 64;
}
(U256(limbs), (-borrow) as u64)
}
}
const SCALAR_P: U256 = U256([P0, P1, P2, P3]);
const SCALAR_N: U256 = U256([N0, N1, N2, N3]);
const SCALAR_BETA: U256 = U256([BETA0, BETA1, BETA2, BETA3]);
const SCALAR_LAMBDA: U256 = U256([LAMBDA0, LAMBDA1, LAMBDA2, LAMBDA3]);
fn scalar_mul_wide(a: &U256, b: &U256) -> [u64; 8] {
let mut r = [0u64; 8];
for i in 0..4 {
let mut carry = 0u128;
for j in 0..4 {
carry += r[i + j] as u128 + a.0[i] as u128 * b.0[j] as u128;
r[i + j] = carry as u64;
carry >>= 64;
}
r[i + 4] += carry as u64;
}
r
}
fn scalar_fp_mul(a: &U256, b: &U256) -> U256 {
let w = scalar_mul_wide(a, b);
const K: u128 = (1u128 << 32) + 977;
const MASK: u128 = 0xFFFF_FFFF_FFFF_FFFF;
let mut a0 = w[0] as u128 + w[4] as u128 * K;
let mut a1 = w[1] as u128 + w[5] as u128 * K;
let mut a2 = w[2] as u128 + w[6] as u128 * K;
let mut a3 = w[3] as u128 + w[7] as u128 * K;
a1 += a0 >> 64;
a0 &= MASK;
a2 += a1 >> 64;
a1 &= MASK;
a3 += a2 >> 64;
a2 &= MASK;
let ov = a3 >> 64;
a3 &= MASK;
let extra = ov * K;
a0 += extra & MASK;
a1 += extra >> 64;
a1 += a0 >> 64;
a0 &= MASK;
a2 += a1 >> 64;
a1 &= MASK;
a3 += a2 >> 64;
a2 &= MASK;
let mut r = U256([a0 as u64, a1 as u64, a2 as u64, a3 as u64]);
if r.ge(&SCALAR_P) {
r = r.sbb(&SCALAR_P).0;
}
if r.ge(&SCALAR_P) {
r = r.sbb(&SCALAR_P).0;
}
r
}
fn scalar_fp_sq(a: &U256) -> U256 {
scalar_fp_mul(a, a)
}
fn scalar_fp_add(a: &U256, b: &U256) -> U256 {
let (s, c) = a.adc(b);
let (s2, b2) = s.sbb(&SCALAR_P);
if c == 1 || b2 == 0 { s2 } else { s }
}
fn scalar_fp_sub(a: &U256, b: &U256) -> U256 {
let (d, borrow) = a.sbb(b);
if borrow == 1 { d.adc(&SCALAR_P).0 } else { d }
}
fn scalar_fp_neg(a: &U256) -> U256 {
if a.is_zero() {
U256::ZERO
} else {
SCALAR_P.sbb(a).0
}
}
fn scalar_fp_half(a: &U256) -> U256 {
if a.0[0] & 1 == 0 {
U256([
(a.0[0] >> 1) | (a.0[1] << 63),
(a.0[1] >> 1) | (a.0[2] << 63),
(a.0[2] >> 1) | (a.0[3] << 63),
a.0[3] >> 1,
])
} else {
let (s, c) = a.adc(&SCALAR_P);
U256([
(s.0[0] >> 1) | (s.0[1] << 63),
(s.0[1] >> 1) | (s.0[2] << 63),
(s.0[2] >> 1) | (s.0[3] << 63),
(s.0[3] >> 1) | ((c as u64) << 63),
])
}
}
fn scalar_fp_pow(a: &U256, exp: &U256) -> U256 {
let mut result = U256::ONE;
let mut base = *a;
for i in 0..256 {
if exp.bit(i) {
result = scalar_fp_mul(&result, &base);
}
base = scalar_fp_sq(&base);
}
result
}
fn scalar_fp_inv(a: &U256) -> Option<U256> {
if a.is_zero() {
return None;
}
let exp = U256([P_MINUS2_0, P_MINUS2_1, P_MINUS2_2, P_MINUS2_3]);
Some(scalar_fp_pow(a, &exp))
}
fn scalar_fp_sqrt(a: &U256) -> Option<U256> {
if a.is_zero() {
return Some(U256::ZERO);
}
let exp = U256([PINV4_0, PINV4_1, PINV4_2, PINV4_3]);
let root = scalar_fp_pow(a, &exp);
if scalar_fp_sq(&root) == *a {
Some(root)
} else {
None
}
}
fn scalar_fn_sub(a: &U256, b: &U256) -> U256 {
let (d, borrow) = a.sbb(b);
if borrow == 1 { d.adc(&SCALAR_N).0 } else { d }
}
fn scalar_fn_neg(a: &U256) -> U256 {
if a.is_zero() {
U256::ZERO
} else {
SCALAR_N.sbb(a).0
}
}
#[allow(unused_assignments)]
fn scalar_fn_mul(a: &U256, b: &U256) -> U256 {
let wide = scalar_mul_wide(a, b);
const N_C_0: u64 = 0x402DA1732FC9BEBF;
const N_C_1: u64 = 0x4551231950B75FC4;
macro_rules! muladd {
($c0:expr, $c1:expr, $c2:expr, $a:expr, $b:expr) => {{
let t = $a as u128 * $b as u128;
let tl = t as u64;
let th = (t >> 64) as u64;
let (x, ov1) = $c0.overflowing_add(tl);
$c0 = x;
let th = th + ov1 as u64;
let (y, ov2) = $c1.overflowing_add(th);
$c1 = y;
$c2 += ov2 as u64;
}};
}
macro_rules! sumadd {
($c0:expr, $c1:expr, $c2:expr, $a:expr) => {{
let (x, ov) = $c0.overflowing_add($a);
$c0 = x;
let (y, ov2) = $c1.overflowing_add(ov as u64);
$c1 = y;
$c2 += ov2 as u64;
}};
}
macro_rules! extract {
($c0:expr, $c1:expr, $c2:expr) => {{
let n = $c0;
$c0 = $c1;
$c1 = $c2;
$c2 = 0;
n
}};
}
let (n0, n1, n2, n3) = (wide[4], wide[5], wide[6], wide[7]);
let (mut c0, mut c1, mut c2): (u64, u64, u64) = (wide[0], 0, 0);
muladd!(c0, c1, c2, n0, N_C_0);
let m0 = extract!(c0, c1, c2);
sumadd!(c0, c1, c2, wide[1]);
muladd!(c0, c1, c2, n1, N_C_0);
muladd!(c0, c1, c2, n0, N_C_1);
let m1 = extract!(c0, c1, c2);
sumadd!(c0, c1, c2, wide[2]);
muladd!(c0, c1, c2, n2, N_C_0);
muladd!(c0, c1, c2, n1, N_C_1);
sumadd!(c0, c1, c2, n0);
let m2 = extract!(c0, c1, c2);
sumadd!(c0, c1, c2, wide[3]);
muladd!(c0, c1, c2, n3, N_C_0);
muladd!(c0, c1, c2, n2, N_C_1);
sumadd!(c0, c1, c2, n1);
let m3 = extract!(c0, c1, c2);
muladd!(c0, c1, c2, n3, N_C_1);
sumadd!(c0, c1, c2, n2);
let m4 = extract!(c0, c1, c2);
sumadd!(c0, c1, c2, n3);
let m5 = extract!(c0, c1, c2);
let m6 = c0 as u32;
c0 = m0;
c1 = 0;
c2 = 0;
muladd!(c0, c1, c2, m4, N_C_0);
let p0 = extract!(c0, c1, c2);
sumadd!(c0, c1, c2, m1);
muladd!(c0, c1, c2, m5, N_C_0);
muladd!(c0, c1, c2, m4, N_C_1);
let p1 = extract!(c0, c1, c2);
sumadd!(c0, c1, c2, m2);
muladd!(c0, c1, c2, m6 as u64, N_C_0);
muladd!(c0, c1, c2, m5, N_C_1);
sumadd!(c0, c1, c2, m4);
let p2 = extract!(c0, c1, c2);
sumadd!(c0, c1, c2, m3);
muladd!(c0, c1, c2, m6 as u64, N_C_1);
sumadd!(c0, c1, c2, m5);
let p3 = extract!(c0, c1, c2);
let p4 = c0 as u32 + m6;
let mut t = p0 as u128 + N_C_0 as u128 * p4 as u128;
let r0 = t as u64;
t >>= 64;
t += p1 as u128 + N_C_1 as u128 * p4 as u128;
let r1 = t as u64;
t >>= 64;
t += p2 as u128 + p4 as u128;
let r2 = t as u64;
t >>= 64;
t += p3 as u128;
let r3 = t as u64;
let carry = (t >> 64) as u64;
let mut result = U256([r0, r1, r2, r3]);
if carry != 0 || result.ge(&SCALAR_N) {
result = result.sbb(&SCALAR_N).0;
}
result
}
fn scalar_fn_pow(a: &U256, exp: &U256) -> U256 {
let mut result = U256::ONE;
let mut base = *a;
for i in 0..256 {
if exp.bit(i) {
result = scalar_fn_mul(&result, &base);
}
base = scalar_fn_mul(&base, &base);
}
result
}
fn scalar_fn_inv(a: &U256) -> Option<U256> {
if a.is_zero() {
return None;
}
let n_minus_2 = U256([N_MINUS2_0, N_MINUS2_1, N_MINUS2_2, N_MINUS2_3]);
Some(scalar_fn_pow(a, &n_minus_2))
}
fn mul256_top128(a: &U256, b: &U256) -> u128 {
let wide = scalar_mul_wide(a, b);
wide[6] as u128 | ((wide[7] as u128) << 64)
}
#[derive(Clone, Copy, Debug)]
struct JacPt {
x: U256,
y: U256,
z: U256,
}
impl JacPt {
fn infinity() -> Self {
JacPt {
x: U256::ONE,
y: U256::ONE,
z: U256::ZERO,
}
}
fn from_affine(x: U256, y: U256) -> Self {
JacPt { x, y, z: U256::ONE }
}
fn is_infinity(&self) -> bool {
self.z.is_zero()
}
fn to_affine(&self) -> Option<(U256, U256)> {
if self.is_infinity() {
return None;
}
let z_inv = scalar_fp_inv(&self.z)?;
let z2 = scalar_fp_sq(&z_inv);
let z3 = scalar_fp_mul(&z2, &z_inv);
Some((scalar_fp_mul(&self.x, &z2), scalar_fp_mul(&self.y, &z3)))
}
}
fn pt_double(p: &JacPt) -> JacPt {
if p.is_infinity() {
return *p;
}
let z3 = scalar_fp_mul(&p.z, &p.y);
let s = scalar_fp_sq(&p.y);
let l = scalar_fp_sq(&p.x);
let l = scalar_fp_half(&scalar_fp_add(&scalar_fp_add(&l, &l), &l));
let t = scalar_fp_mul(&scalar_fp_neg(&s), &p.x);
let x3 = scalar_fp_add(&scalar_fp_add(&scalar_fp_sq(&l), &t), &t);
let ss = scalar_fp_sq(&s);
let y3 = scalar_fp_neg(&scalar_fp_add(
&scalar_fp_mul(&scalar_fp_add(&x3, &t), &l),
&ss,
));
JacPt {
x: x3,
y: y3,
z: z3,
}
}
fn pt_double_2(x1h2: U256) -> U256 {
scalar_fp_add(&x1h2, &x1h2)
}
fn pt_add_mixed(p: &JacPt, qx: &U256, qy: &U256) -> JacPt {
if p.is_infinity() {
return JacPt::from_affine(*qx, *qy);
}
let z2 = scalar_fp_sq(&p.z);
let z3 = scalar_fp_mul(&z2, &p.z);
let u2 = scalar_fp_mul(qx, &z2);
let s2 = scalar_fp_mul(qy, &z3);
let h = scalar_fp_sub(&u2, &p.x);
let r = scalar_fp_sub(&s2, &p.y);
if h.is_zero() && r.is_zero() {
return pt_double(&JacPt::from_affine(*qx, *qy));
}
if h.is_zero() {
return JacPt::infinity();
}
let h2 = scalar_fp_sq(&h);
let h3 = scalar_fp_mul(&h, &h2);
let x1h2 = scalar_fp_mul(&p.x, &h2);
let x3 = scalar_fp_sub(&scalar_fp_sub(&scalar_fp_sq(&r), &h3), &pt_double_2(x1h2));
let y3 = scalar_fp_sub(
&scalar_fp_mul(&r, &scalar_fp_sub(&x1h2, &x3)),
&scalar_fp_mul(&p.y, &h3),
);
let z3 = scalar_fp_mul(&p.z, &h);
JacPt {
x: x3,
y: y3,
z: z3,
}
}
fn pt_neg(p: &JacPt) -> JacPt {
JacPt {
x: p.x,
y: scalar_fp_neg(&p.y),
z: p.z,
}
}
fn pt_add(p: &JacPt, q: &JacPt) -> JacPt {
if p.is_infinity() {
return *q;
}
if q.is_infinity() {
return *p;
}
let z1sq = scalar_fp_sq(&p.z);
let z2sq = scalar_fp_sq(&q.z);
let u1 = scalar_fp_mul(&p.x, &z2sq);
let u2 = scalar_fp_mul(&q.x, &z1sq);
let s1 = scalar_fp_mul(&p.y, &scalar_fp_mul(&q.z, &z2sq));
let s2 = scalar_fp_mul(&q.y, &scalar_fp_mul(&p.z, &z1sq));
let h = scalar_fp_sub(&u2, &u1);
let r = scalar_fp_sub(&s2, &s1);
if h.is_zero() {
return if r.is_zero() {
pt_double(p)
} else {
JacPt::infinity()
};
}
let h2 = scalar_fp_sq(&h);
let h3 = scalar_fp_mul(&h, &h2);
let u1h2 = scalar_fp_mul(&u1, &h2);
let x3 = scalar_fp_sub(&scalar_fp_sub(&scalar_fp_sq(&r), &h3), &pt_double_2(u1h2));
let y3 = scalar_fp_sub(
&scalar_fp_mul(&r, &scalar_fp_sub(&u1h2, &x3)),
&scalar_fp_mul(&s1, &h3),
);
let z3 = scalar_fp_mul(&scalar_fp_mul(&h, &p.z), &q.z);
JacPt {
x: x3,
y: y3,
z: z3,
}
}
#[derive(Clone, Copy, Debug)]
struct S129 {
mag: u128,
hi: bool,
neg: bool,
}
impl S129 {
fn from_u256_signed(v: U256) -> Self {
let n_half = U256([
0xDFE92F46681B20A0,
0x5D576E7357A4501D,
0xFFFFFFFFFFFFFFFF,
0x7FFFFFFFFFFFFFFF,
]);
if v.ge(&n_half) {
let neg_v = SCALAR_N.sbb(&v).0;
S129 {
mag: neg_v.0[0] as u128 | ((neg_v.0[1] as u128) << 64),
hi: neg_v.0[2] != 0,
neg: true,
}
} else {
S129 {
mag: v.0[0] as u128 | ((v.0[1] as u128) << 64),
hi: v.0[2] != 0,
neg: false,
}
}
}
}
fn glv_decompose(k: &U256) -> (S129, S129) {
let g1 = U256([G1_0, G1_1, G1_2, G1_3]);
let g2 = U256([G2_0, G2_1, G2_2, G2_3]);
let b1 = U256([B1_0, B1_1, 0, 0]);
let b2 = U256([B2_0, B2_1, 0, 0]);
let c1 = mul256_top128(k, &g1);
let c2 = mul256_top128(k, &g2);
let c1u = U256([c1 as u64, (c1 >> 64) as u64, 0, 0]);
let c2u = U256([c2 as u64, (c2 >> 64) as u64, 0, 0]);
let r2_raw = scalar_fn_sub(&scalar_fn_mul(&c2u, &b2), &scalar_fn_mul(&c1u, &b1));
let r1_raw = scalar_fn_sub(k, &scalar_fn_mul(&r2_raw, &SCALAR_LAMBDA));
(
S129::from_u256_signed(r1_raw),
S129::from_u256_signed(r2_raw),
)
}
const WNAF_WIDTH: usize = 5;
const WNAF_WINDOW: i32 = 1 << WNAF_WIDTH;
const WNAF_MASK: i32 = WNAF_WINDOW - 1;
fn wnaf_129(k_lo: u128, k_hi: bool) -> [i8; 131] {
let mut lo = k_lo;
let mut hi = k_hi as u128;
let mut naf = [0i8; 131];
let mut i = 0usize;
while lo != 0 || hi != 0 {
if lo & 1 == 1 {
let mod_w = (lo as i32) & WNAF_MASK;
let digit = if mod_w > WNAF_WINDOW / 2 {
mod_w - WNAF_WINDOW
} else {
mod_w
};
naf[i] = digit as i8;
if digit < 0 {
let (new_lo, carry) = lo.overflowing_add((-digit) as u128);
lo = new_lo;
hi += carry as u128;
} else {
lo -= digit as u128;
}
}
lo = (lo >> 1) | (hi << 127);
hi >>= 1;
i += 1;
}
naf
}
fn build_table(p: &JacPt) -> [JacPt; 8] {
let p2 = pt_double(p);
let mut table = [JacPt::infinity(); 8];
table[0] = *p;
for i in 1..8 {
table[i] = pt_add(&table[i - 1], &p2);
}
table
}
fn table_get(table: &[JacPt; 8], d: i8) -> JacPt {
let idx = (d.unsigned_abs() as usize - 1) / 2;
let p = table[idx];
if d < 0 { pt_neg(&p) } else { p }
}
fn g_table_lookup(d: i8, negate: bool) -> (U256, U256) {
let d = if negate { -d } else { d };
let idx = (d.unsigned_abs() as usize - 1) / 2;
let (x, y) = (U256(G_TABLE[idx].0), U256(G_TABLE[idx].1));
if d < 0 {
(x, scalar_fp_neg(&y))
} else {
(x, y)
}
}
fn phi_g_table_lookup(d: i8, negate: bool) -> (U256, U256) {
let d = if negate { -d } else { d };
let idx = (d.unsigned_abs() as usize - 1) / 2;
let (x, y) = (U256(PHI_G_TABLE[idx].0), U256(PHI_G_TABLE[idx].1));
if d < 0 {
(x, scalar_fp_neg(&y))
} else {
(x, y)
}
}
fn scalar_mul_g(scalar: &U256) -> JacPt {
let (k1, k2) = glv_decompose(scalar);
let naf1 = wnaf_129(k1.mag, k1.hi);
let naf2 = wnaf_129(k2.mag, k2.hi);
let mut acc = JacPt::infinity();
for i in (0..131usize).rev() {
if !acc.is_infinity() {
acc = pt_double(&acc);
}
if naf1[i] != 0 {
let (qx, qy) = g_table_lookup(naf1[i], k1.neg);
acc = if acc.is_infinity() {
JacPt::from_affine(qx, qy)
} else {
pt_add_mixed(&acc, &qx, &qy)
};
}
if naf2[i] != 0 {
let (qx, qy) = phi_g_table_lookup(naf2[i], k2.neg);
acc = if acc.is_infinity() {
JacPt::from_affine(qx, qy)
} else {
pt_add_mixed(&acc, &qx, &qy)
};
}
}
acc
}
fn scalar_mul_affine(scalar: &U256, px: &U256, py: &U256) -> JacPt {
let (k1, k2) = glv_decompose(scalar);
let p1_base = if k1.neg {
JacPt::from_affine(*px, scalar_fp_neg(py))
} else {
JacPt::from_affine(*px, *py)
};
let phi_p = JacPt::from_affine(scalar_fp_mul(px, &SCALAR_BETA), *py);
let p2_base = if k2.neg { pt_neg(&phi_p) } else { phi_p };
let table1 = build_table(&p1_base);
let table2 = build_table(&p2_base);
let naf1 = wnaf_129(k1.mag, k1.hi);
let naf2 = wnaf_129(k2.mag, k2.hi);
let mut acc = JacPt::infinity();
for i in (0..131usize).rev() {
if !acc.is_infinity() {
acc = pt_double(&acc);
}
if naf1[i] != 0 {
let addend = table_get(&table1, naf1[i]);
acc = if acc.is_infinity() {
addend
} else {
pt_add(&acc, &addend)
};
}
if naf2[i] != 0 {
let addend = table_get(&table2, naf2[i]);
acc = if acc.is_infinity() {
addend
} else {
pt_add(&acc, &addend)
};
}
}
acc
}
#[cfg(target_arch = "x86_64")]
pub mod x8 {
#![allow(unsafe_op_in_unsafe_fn, unused_imports)]
use super::{
JacPt, P0, P1, P2, P3, S129, SCALAR_BETA, U256, build_table, g_table_lookup, glv_decompose,
phi_g_table_lookup, pt_double, pt_neg, scalar_fn_inv, scalar_fn_mul, scalar_fn_neg,
scalar_fn_sub, scalar_fp_add, scalar_fp_inv, scalar_fp_mul, scalar_fp_neg, scalar_fp_sq,
scalar_fp_sqrt, scalar_fp_sub, scalar_mul_affine, scalar_mul_g, table_get, wnaf_129,
};
use core::arch::x86_64::*;
pub const MASK52: u64 = (1u64 << 52) - 1;
pub const FP52_P: [u64; 5] = to_52bit([P0, P1, P2, P3]);
pub const fn to_52bit(v: [u64; 4]) -> [u64; 5] {
let (v0, v1, v2, v3) = (v[0], v[1], v[2], v[3]);
[
v0 & MASK52, ((v0 >> 52) | (v1 << 12)) & MASK52, ((v1 >> 40) | (v2 << 24)) & MASK52, ((v2 >> 28) | (v3 << 36)) & MASK52, v3 >> 16, ]
}
pub const fn from_52bit(l: [u64; 5]) -> [u64; 4] {
[
l[0] | (l[1] << 52),
(l[1] >> 12) | (l[2] << 40),
(l[2] >> 24) | (l[3] << 28),
(l[3] >> 36) | (l[4] << 16),
]
}
#[derive(Clone, Copy)]
pub struct U256x8 {
pub limbs: [__m512i; 5],
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn load(vals: &[[u64; 4]; 8]) -> U256x8 {
let mut raw = [[0u64; 5]; 8];
for i in 0..8 {
raw[i] = to_52bit(vals[i]);
}
let mut limbs = [_mm512_setzero_si512(); 5];
for k in 0..5 {
limbs[k] = _mm512_set_epi64(
raw[7][k] as i64,
raw[6][k] as i64,
raw[5][k] as i64,
raw[4][k] as i64,
raw[3][k] as i64,
raw[2][k] as i64,
raw[1][k] as i64,
raw[0][k] as i64,
);
}
U256x8 { limbs }
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn store(a: U256x8) -> [[u64; 4]; 8] {
let mut raw = [[0u64; 5]; 8];
for k in 0..5 {
let lane: [u64; 8] = core::mem::transmute(a.limbs[k]);
for i in 0..8 {
raw[i][k] = lane[i];
}
}
core::array::from_fn(|i| from_52bit(raw[i]))
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fp_add_x8(a: U256x8, b: U256x8) -> U256x8 {
let mask52 = _mm512_set1_epi64(MASK52 as i64);
let p = core::array::from_fn::<__m512i, 5, _>(|k| _mm512_set1_epi64(FP52_P[k] as i64));
let mut t: [__m512i; 5] =
core::array::from_fn(|k| _mm512_add_epi64(a.limbs[k], b.limbs[k]));
let carry = _mm512_srli_epi64(t[0], 52);
t[0] = _mm512_and_epi64(t[0], mask52);
t[1] = _mm512_add_epi64(t[1], carry);
let carry = _mm512_srli_epi64(t[1], 52);
t[1] = _mm512_and_epi64(t[1], mask52);
t[2] = _mm512_add_epi64(t[2], carry);
let carry = _mm512_srli_epi64(t[2], 52);
t[2] = _mm512_and_epi64(t[2], mask52);
t[3] = _mm512_add_epi64(t[3], carry);
let carry = _mm512_srli_epi64(t[3], 52);
t[3] = _mm512_and_epi64(t[3], mask52);
t[4] = _mm512_add_epi64(t[4], carry);
let (d, no_borrow) = sub_p_x8(t, &p, mask52);
let result: [__m512i; 5] =
core::array::from_fn(|k| _mm512_mask_blend_epi64(no_borrow, t[k], d[k]));
U256x8 { limbs: result }
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fp_sub_x8(a: U256x8, b: U256x8) -> U256x8 {
fp_add_x8(a, fp_neg_x8(b))
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fp_neg_x8(a: U256x8) -> U256x8 {
let mask52 = _mm512_set1_epi64(MASK52 as i64);
let zero = _mm512_setzero_si512();
let p: [__m512i; 5] = core::array::from_fn(|k| _mm512_set1_epi64(FP52_P[k] as i64));
let (mut d, _) = sub_limbs_x8(p, a.limbs, mask52);
let zero_mask = _mm512_cmpeq_epi64_mask(a.limbs[0], zero);
for k in 0..5 {
d[k] = _mm512_mask_blend_epi64(zero_mask, d[k], zero);
}
U256x8 { limbs: d }
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fp_mul_x8(a: U256x8, b: U256x8) -> U256x8 {
let [a0, a1, a2, a3, a4] = a.limbs;
let [b0, b1, b2, b3, b4] = b.limbs;
let z = _mm512_setzero_si512();
macro_rules! mlo {
($acc:expr, $a:expr, $b:expr) => {
_mm512_madd52lo_epu64($acc, $a, $b)
};
}
macro_rules! mhi {
($acc:expr, $a:expr, $b:expr) => {
_mm512_madd52hi_epu64($acc, $a, $b)
};
}
let mut t = [z; 10];
t[0] = mlo!(t[0], a0, b0);
t[1] = mhi!(t[1], a0, b0);
t[1] = mlo!(t[1], a0, b1);
t[2] = mhi!(t[2], a0, b1);
t[2] = mlo!(t[2], a0, b2);
t[3] = mhi!(t[3], a0, b2);
t[3] = mlo!(t[3], a0, b3);
t[4] = mhi!(t[4], a0, b3);
t[4] = mlo!(t[4], a0, b4);
t[5] = mhi!(t[5], a0, b4);
t[1] = mlo!(t[1], a1, b0);
t[2] = mhi!(t[2], a1, b0);
t[2] = mlo!(t[2], a1, b1);
t[3] = mhi!(t[3], a1, b1);
t[3] = mlo!(t[3], a1, b2);
t[4] = mhi!(t[4], a1, b2);
t[4] = mlo!(t[4], a1, b3);
t[5] = mhi!(t[5], a1, b3);
t[5] = mlo!(t[5], a1, b4);
t[6] = mhi!(t[6], a1, b4);
t[2] = mlo!(t[2], a2, b0);
t[3] = mhi!(t[3], a2, b0);
t[3] = mlo!(t[3], a2, b1);
t[4] = mhi!(t[4], a2, b1);
t[4] = mlo!(t[4], a2, b2);
t[5] = mhi!(t[5], a2, b2);
t[5] = mlo!(t[5], a2, b3);
t[6] = mhi!(t[6], a2, b3);
t[6] = mlo!(t[6], a2, b4);
t[7] = mhi!(t[7], a2, b4);
t[3] = mlo!(t[3], a3, b0);
t[4] = mhi!(t[4], a3, b0);
t[4] = mlo!(t[4], a3, b1);
t[5] = mhi!(t[5], a3, b1);
t[5] = mlo!(t[5], a3, b2);
t[6] = mhi!(t[6], a3, b2);
t[6] = mlo!(t[6], a3, b3);
t[7] = mhi!(t[7], a3, b3);
t[7] = mlo!(t[7], a3, b4);
t[8] = mhi!(t[8], a3, b4);
t[4] = mlo!(t[4], a4, b0);
t[5] = mhi!(t[5], a4, b0);
t[5] = mlo!(t[5], a4, b1);
t[6] = mhi!(t[6], a4, b1);
t[6] = mlo!(t[6], a4, b2);
t[7] = mhi!(t[7], a4, b2);
t[7] = mlo!(t[7], a4, b3);
t[8] = mhi!(t[8], a4, b3);
t[8] = mlo!(t[8], a4, b4);
t[9] = mhi!(t[9], a4, b4);
let mask52 = _mm512_set1_epi64(MASK52 as i64);
let cy = _mm512_srli_epi64(t[0], 52);
t[0] = _mm512_and_epi64(t[0], mask52);
t[1] = _mm512_add_epi64(t[1], cy);
let cy = _mm512_srli_epi64(t[1], 52);
t[1] = _mm512_and_epi64(t[1], mask52);
t[2] = _mm512_add_epi64(t[2], cy);
let cy = _mm512_srli_epi64(t[2], 52);
t[2] = _mm512_and_epi64(t[2], mask52);
t[3] = _mm512_add_epi64(t[3], cy);
let cy = _mm512_srli_epi64(t[3], 52);
t[3] = _mm512_and_epi64(t[3], mask52);
t[4] = _mm512_add_epi64(t[4], cy);
let cy = _mm512_srli_epi64(t[4], 52);
t[4] = _mm512_and_epi64(t[4], mask52);
t[5] = _mm512_add_epi64(t[5], cy);
let cy = _mm512_srli_epi64(t[5], 52);
t[5] = _mm512_and_epi64(t[5], mask52);
t[6] = _mm512_add_epi64(t[6], cy);
let cy = _mm512_srli_epi64(t[6], 52);
t[6] = _mm512_and_epi64(t[6], mask52);
t[7] = _mm512_add_epi64(t[7], cy);
let cy = _mm512_srli_epi64(t[7], 52);
t[7] = _mm512_and_epi64(t[7], mask52);
t[8] = _mm512_add_epi64(t[8], cy);
let cy = _mm512_srli_epi64(t[8], 52);
t[8] = _mm512_and_epi64(t[8], mask52);
t[9] = _mm512_add_epi64(t[9], cy);
let fk = _mm512_set1_epi64((16u64 * ((1u64 << 32) + 977)) as i64);
t[0] = mlo!(t[0], t[5], fk);
t[1] = mhi!(t[1], t[5], fk);
t[1] = mlo!(t[1], t[6], fk);
t[2] = mhi!(t[2], t[6], fk);
t[2] = mlo!(t[2], t[7], fk);
t[3] = mhi!(t[3], t[7], fk);
t[3] = mlo!(t[3], t[8], fk);
t[4] = mhi!(t[4], t[8], fk);
t[4] = mlo!(t[4], t[9], fk);
let hi9 = _mm512_madd52hi_epu64(_mm512_setzero_si512(), t[9], fk);
t[0] = mlo!(t[0], hi9, fk);
t[1] = mhi!(t[1], hi9, fk);
let cy = _mm512_srli_epi64(t[0], 52);
t[0] = _mm512_and_epi64(t[0], mask52);
t[1] = _mm512_add_epi64(t[1], cy);
let cy = _mm512_srli_epi64(t[1], 52);
t[1] = _mm512_and_epi64(t[1], mask52);
t[2] = _mm512_add_epi64(t[2], cy);
let cy = _mm512_srli_epi64(t[2], 52);
t[2] = _mm512_and_epi64(t[2], mask52);
t[3] = _mm512_add_epi64(t[3], cy);
let cy = _mm512_srli_epi64(t[3], 52);
t[3] = _mm512_and_epi64(t[3], mask52);
t[4] = _mm512_add_epi64(t[4], cy);
let carry4 = _mm512_srli_epi64(t[4], 48);
t[4] = _mm512_and_epi64(t[4], _mm512_set1_epi64(((1u64 << 48) - 1) as i64));
let k_splat = _mm512_set1_epi64(((1u64 << 32) + 977) as i64);
t[0] = mlo!(t[0], carry4, k_splat); let c0 = _mm512_srli_epi64(t[0], 52);
t[0] = _mm512_and_epi64(t[0], mask52);
t[1] = _mm512_add_epi64(t[1], c0);
let p: [__m512i; 5] = core::array::from_fn(|k| _mm512_set1_epi64(FP52_P[k] as i64));
let (d, no_borrow) = sub_p_x8([t[0], t[1], t[2], t[3], t[4]], &p, mask52);
let result: [__m512i; 5] =
core::array::from_fn(|k| _mm512_mask_blend_epi64(no_borrow, t[k], d[k]));
U256x8 { limbs: result }
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fp_sq_x8(a: U256x8) -> U256x8 {
fp_mul_x8(a, a)
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fp_sqrt_x8(a: U256x8) -> U256x8 {
macro_rules! sq {
($x:expr) => {
fp_sq_x8($x)
};
}
macro_rules! mul {
($a:expr, $b:expr) => {
fp_mul_x8($a, $b)
};
}
macro_rules! sq_n {
($x:expr, $n:literal) => {{
let mut t = $x;
for _ in 0..$n {
t = fp_sq_x8(t);
}
t
}};
}
let x2 = mul!(sq!(a), a); let x3 = mul!(sq!(x2), a); let x6 = mul!(sq_n!(x3, 3), x3); let x9 = mul!(sq_n!(x6, 3), x3); let x11 = mul!(sq_n!(x9, 2), x2); let x22 = mul!(sq_n!(x11, 11), x11); let x44 = mul!(sq_n!(x22, 22), x22); let x88 = mul!(sq_n!(x44, 44), x44); let x176 = mul!(sq_n!(x88, 88), x88); let x220 = mul!(sq_n!(x176, 44), x44); let x223 = mul!(sq_n!(x220, 3), x3);
let mut r = x223;
r = sq!(r); r = mul!(sq_n!(r, 22), x22); r = sq_n!(r, 4); r = mul!(sq_n!(r, 2), x2); r = sq_n!(r, 2); r
}
#[target_feature(enable = "avx512f,avx512ifma")]
unsafe fn sub_p_x8(
t: [__m512i; 5],
p: &[__m512i; 5],
mask52: __m512i,
) -> ([__m512i; 5], __mmask8) {
sub_limbs_x8(t, *p, mask52)
}
#[target_feature(enable = "avx512f,avx512ifma")]
unsafe fn sub_limbs_x8(
a: [__m512i; 5],
b: [__m512i; 5],
mask52: __m512i,
) -> ([__m512i; 5], __mmask8) {
let base = _mm512_set1_epi64((1u64 << 52) as i64);
let one = _mm512_set1_epi64(1i64);
let zero = _mm512_setzero_si512();
let mut d = [zero; 5];
macro_rules! borrow_step {
($k:literal, $borrow:expr) => {{
let v = _mm512_sub_epi64(
_mm512_sub_epi64(_mm512_add_epi64(a[$k], base), b[$k]),
$borrow,
);
d[$k] = _mm512_and_epi64(v, mask52);
_mm512_sub_epi64(one, _mm512_srli_epi64(v, 52))
}};
}
let borrow = borrow_step!(0, zero);
let borrow = borrow_step!(1, borrow);
let borrow = borrow_step!(2, borrow);
let borrow = borrow_step!(3, borrow);
let borrow = borrow_step!(4, borrow);
let no_borrow_mask = _mm512_cmpeq_epi64_mask(borrow, zero);
(d, no_borrow_mask)
}
const FN52_N: [u64; 5] = [
0x25e8cd0364141,
0xe6af48a03bbfd,
0xffffffebaaedc,
0xfffffffffffff,
0xffffffffffff,
];
const NC16_0: u64 = 0xa1732fc9bebf0; const NC16_1: u64 = 0x950b75fc4402d; const NC16_2: u64 = 0x14551231;
const NC_0: u64 = 0xda1732fc9bebf; const NC_1: u64 = 0x1950b75fc4402; const NC_2: u64 = 0x1455123;
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fn_mul_x8(a: U256x8, b: U256x8) -> U256x8 {
let [a0, a1, a2, a3, a4] = a.limbs;
let [b0, b1, b2, b3, b4] = b.limbs;
let z = _mm512_setzero_si512();
let mask52 = _mm512_set1_epi64(MASK52 as i64);
macro_rules! mlo {
($acc:expr, $x:expr, $y:expr) => {
_mm512_madd52lo_epu64($acc, $x, $y)
};
}
macro_rules! mhi {
($acc:expr, $x:expr, $y:expr) => {
_mm512_madd52hi_epu64($acc, $x, $y)
};
}
let mut t = [z; 10];
t[0] = mlo!(t[0], a0, b0);
t[1] = mhi!(t[1], a0, b0);
t[1] = mlo!(t[1], a0, b1);
t[2] = mhi!(t[2], a0, b1);
t[2] = mlo!(t[2], a0, b2);
t[3] = mhi!(t[3], a0, b2);
t[3] = mlo!(t[3], a0, b3);
t[4] = mhi!(t[4], a0, b3);
t[4] = mlo!(t[4], a0, b4);
t[5] = mhi!(t[5], a0, b4);
t[1] = mlo!(t[1], a1, b0);
t[2] = mhi!(t[2], a1, b0);
t[2] = mlo!(t[2], a1, b1);
t[3] = mhi!(t[3], a1, b1);
t[3] = mlo!(t[3], a1, b2);
t[4] = mhi!(t[4], a1, b2);
t[4] = mlo!(t[4], a1, b3);
t[5] = mhi!(t[5], a1, b3);
t[5] = mlo!(t[5], a1, b4);
t[6] = mhi!(t[6], a1, b4);
t[2] = mlo!(t[2], a2, b0);
t[3] = mhi!(t[3], a2, b0);
t[3] = mlo!(t[3], a2, b1);
t[4] = mhi!(t[4], a2, b1);
t[4] = mlo!(t[4], a2, b2);
t[5] = mhi!(t[5], a2, b2);
t[5] = mlo!(t[5], a2, b3);
t[6] = mhi!(t[6], a2, b3);
t[6] = mlo!(t[6], a2, b4);
t[7] = mhi!(t[7], a2, b4);
t[3] = mlo!(t[3], a3, b0);
t[4] = mhi!(t[4], a3, b0);
t[4] = mlo!(t[4], a3, b1);
t[5] = mhi!(t[5], a3, b1);
t[5] = mlo!(t[5], a3, b2);
t[6] = mhi!(t[6], a3, b2);
t[6] = mlo!(t[6], a3, b3);
t[7] = mhi!(t[7], a3, b3);
t[7] = mlo!(t[7], a3, b4);
t[8] = mhi!(t[8], a3, b4);
t[4] = mlo!(t[4], a4, b0);
t[5] = mhi!(t[5], a4, b0);
t[5] = mlo!(t[5], a4, b1);
t[6] = mhi!(t[6], a4, b1);
t[6] = mlo!(t[6], a4, b2);
t[7] = mhi!(t[7], a4, b2);
t[7] = mlo!(t[7], a4, b3);
t[8] = mhi!(t[8], a4, b3);
t[8] = mlo!(t[8], a4, b4);
t[9] = mhi!(t[9], a4, b4);
macro_rules! prop {
($lo:literal, $hi:literal) => {{
let cy = _mm512_srli_epi64(t[$lo], 52);
t[$lo] = _mm512_and_epi64(t[$lo], mask52);
t[$hi] = _mm512_add_epi64(t[$hi], cy);
}};
}
prop!(0, 1);
prop!(1, 2);
prop!(2, 3);
prop!(3, 4);
prop!(4, 5);
prop!(5, 6);
prop!(6, 7);
prop!(7, 8);
prop!(8, 9);
let nc0 = _mm512_set1_epi64(NC16_0 as i64);
let nc1 = _mm512_set1_epi64(NC16_1 as i64);
let nc2 = _mm512_set1_epi64(NC16_2 as i64);
let (h5, h6, h7, h8, h9) = (t[5], t[6], t[7], t[8], t[9]);
t[5] = z;
t[6] = z;
t[7] = z;
t[0] = mlo!(t[0], h5, nc0);
t[1] = mhi!(t[1], h5, nc0);
t[1] = mlo!(t[1], h5, nc1);
t[2] = mhi!(t[2], h5, nc1);
t[2] = mlo!(t[2], h5, nc2);
t[3] = mhi!(t[3], h5, nc2);
t[1] = mlo!(t[1], h6, nc0);
t[2] = mhi!(t[2], h6, nc0);
t[2] = mlo!(t[2], h6, nc1);
t[3] = mhi!(t[3], h6, nc1);
t[3] = mlo!(t[3], h6, nc2);
t[4] = mhi!(t[4], h6, nc2);
t[2] = mlo!(t[2], h7, nc0);
t[3] = mhi!(t[3], h7, nc0);
t[3] = mlo!(t[3], h7, nc1);
t[4] = mhi!(t[4], h7, nc1);
t[4] = mlo!(t[4], h7, nc2);
t[5] = mhi!(t[5], h7, nc2);
t[3] = mlo!(t[3], h8, nc0);
t[4] = mhi!(t[4], h8, nc0);
t[4] = mlo!(t[4], h8, nc1);
t[5] = mhi!(t[5], h8, nc1);
t[5] = mlo!(t[5], h8, nc2);
t[6] = mhi!(t[6], h8, nc2);
t[4] = mlo!(t[4], h9, nc0);
t[5] = mhi!(t[5], h9, nc0);
t[5] = mlo!(t[5], h9, nc1);
t[6] = mhi!(t[6], h9, nc1);
t[6] = mlo!(t[6], h9, nc2);
t[7] = mhi!(t[7], h9, nc2);
prop!(0, 1);
prop!(1, 2);
prop!(2, 3);
prop!(3, 4);
prop!(4, 5);
prop!(5, 6);
prop!(6, 7);
let (g5, g6, g7) = (t[5], t[6], t[7]);
t[5] = z;
t[6] = z;
t[7] = z;
t[0] = mlo!(t[0], g5, nc0);
t[1] = mhi!(t[1], g5, nc0);
t[1] = mlo!(t[1], g5, nc1);
t[2] = mhi!(t[2], g5, nc1);
t[2] = mlo!(t[2], g5, nc2);
t[3] = mhi!(t[3], g5, nc2);
t[1] = mlo!(t[1], g6, nc0);
t[2] = mhi!(t[2], g6, nc0);
t[2] = mlo!(t[2], g6, nc1);
t[3] = mhi!(t[3], g6, nc1);
t[3] = mlo!(t[3], g6, nc2);
t[4] = mhi!(t[4], g6, nc2);
t[2] = mlo!(t[2], g7, nc0);
t[3] = mhi!(t[3], g7, nc0);
t[3] = mlo!(t[3], g7, nc1);
t[4] = mhi!(t[4], g7, nc1);
t[4] = mlo!(t[4], g7, nc2);
prop!(0, 1);
prop!(1, 2);
prop!(2, 3);
prop!(3, 4);
let nc0b = _mm512_set1_epi64(NC_0 as i64);
let nc1b = _mm512_set1_epi64(NC_1 as i64);
let nc2b = _mm512_set1_epi64(NC_2 as i64);
let mask48 = _mm512_set1_epi64(((1u64 << 48) - 1) as i64);
let q = _mm512_srli_epi64(t[4], 48);
t[4] = _mm512_and_epi64(t[4], mask48);
t[0] = mlo!(t[0], q, nc0b);
t[1] = mhi!(t[1], q, nc0b);
t[1] = mlo!(t[1], q, nc1b);
t[2] = mhi!(t[2], q, nc1b);
t[2] = mlo!(t[2], q, nc2b);
t[3] = mhi!(t[3], q, nc2b);
prop!(0, 1);
prop!(1, 2);
prop!(2, 3);
prop!(3, 4);
let n: [__m512i; 5] = core::array::from_fn(|k| _mm512_set1_epi64(FN52_N[k] as i64));
let t5 = [t[0], t[1], t[2], t[3], t[4]];
let (d, no_borrow) = sub_p_x8(t5, &n, mask52);
let result: [__m512i; 5] =
core::array::from_fn(|k| _mm512_mask_blend_epi64(no_borrow, t5[k], d[k]));
U256x8 { limbs: result }
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fn_sq_x8(a: U256x8, n: u32) -> U256x8 {
let mut r = fn_mul_x8(a, a);
for _ in 1..n {
r = fn_mul_x8(r, r);
}
r
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fn_neg_x8(a: U256x8) -> U256x8 {
let mask52 = _mm512_set1_epi64(MASK52 as i64);
let zero = _mm512_setzero_si512();
let n: [__m512i; 5] = core::array::from_fn(|k| _mm512_set1_epi64(FN52_N[k] as i64));
let (mut d, _) = sub_limbs_x8(n, a.limbs, mask52);
let zero_mask = _mm512_cmpeq_epi64_mask(a.limbs[0], zero);
for k in 0..5 {
d[k] = _mm512_mask_blend_epi64(zero_mask, d[k], zero);
}
U256x8 { limbs: d }
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn fn_inv_x8(a: U256x8) -> U256x8 {
macro_rules! sq {
($x:expr, $n:expr) => {
fn_sq_x8($x, $n)
};
}
macro_rules! mul {
($x:expr, $y:expr) => {
fn_mul_x8($x, $y)
};
}
let x1 = a;
let x2 = mul!(sq!(x1, 1), x1); let x3 = mul!(sq!(x2, 1), x1); let x6 = mul!(sq!(x3, 3), x3); let x9 = mul!(sq!(x6, 3), x3); let x11 = mul!(sq!(x9, 2), x2); let x22 = mul!(sq!(x11, 11), x11); let x44 = mul!(sq!(x22, 22), x22); let x88 = mul!(sq!(x44, 44), x44);
let x4 = mul!(sq!(x3, 1), x1); let x8 = mul!(sq!(x4, 4), x4); let x17 = mul!(sq!(x11, 6), x6); let x39 = mul!(sq!(x22, 17), x17); let x127 = mul!(sq!(x88, 39), x39);
let r = x127;
let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 1);
let r = sq!(r, 3);
let r = mul!(r, x3); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 1);
let r = sq!(r, 3);
let r = mul!(r, x3); let r = sq!(r, 1);
let r = sq!(r, 2);
let r = mul!(r, x2); let r = sq!(r, 1);
let r = sq!(r, 3);
let r = mul!(r, x3); let r = sq!(r, 2);
let r = sq!(r, 3);
let r = mul!(r, x3); let r = sq!(r, 2);
let r = sq!(r, 2);
let r = mul!(r, x2); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 1);
let r = sq!(r, 4);
let r = mul!(r, x4); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 2);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 3);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 7);
let r = sq!(r, 3);
let r = mul!(r, x3); let r = sq!(r, 1);
let r = sq!(r, 3);
let r = mul!(r, x3); let r = sq!(r, 1);
let r = sq!(r, 8);
let r = mul!(r, x8); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 2);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 2);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 1);
let r = sq!(r, 4);
let r = mul!(r, x4); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 3);
let r = sq!(r, 2);
let r = mul!(r, x2); let r = sq!(r, 2);
let r = sq!(r, 2);
let r = mul!(r, x2); let r = sq!(r, 1);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 6);
let r = sq!(r, 2);
let r = mul!(r, x2); let r = sq!(r, 1);
let r = sq!(r, 2);
let r = mul!(r, x2); let r = sq!(r, 2);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 5);
let r = sq!(r, 1);
let r = mul!(r, x1); let r = sq!(r, 2);
let r = sq!(r, 6);
let r = mul!(r, x6); r
}
#[derive(Clone, Copy)]
pub struct JacPtx8 {
pub x: U256x8,
pub y: U256x8,
pub z: U256x8,
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn pt_double_x8(p: JacPtx8) -> JacPtx8 {
let JacPtx8 {
x: x1,
y: y1,
z: z1,
} = p;
let a = fp_sq_x8(x1); let b = fp_sq_x8(y1); let c = fp_sq_x8(b);
let x1pb = fp_add_x8(x1, b);
let d_half = fp_sub_x8(fp_sub_x8(fp_sq_x8(x1pb), a), c);
let d = fp_add_x8(d_half, d_half);
let e = fp_add_x8(fp_add_x8(a, a), a);
let f = fp_sq_x8(e);
let x3 = fp_sub_x8(f, fp_add_x8(d, d));
let two_c = fp_add_x8(c, c);
let eight_c = fp_add_x8(fp_add_x8(two_c, two_c), fp_add_x8(two_c, two_c));
let y3 = fp_sub_x8(fp_mul_x8(e, fp_sub_x8(d, x3)), eight_c);
let y1z1 = fp_mul_x8(y1, z1);
let z3 = fp_add_x8(y1z1, y1z1);
JacPtx8 {
x: x3,
y: y3,
z: z3,
}
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn blend_x8(mask: __mmask8, on_true: U256x8, on_false: U256x8) -> U256x8 {
U256x8 {
limbs: core::array::from_fn(|k| {
_mm512_mask_blend_epi64(mask, on_false.limbs[k], on_true.limbs[k])
}),
}
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn blend_jacpt_x8(mask: __mmask8, on_true: JacPtx8, on_false: JacPtx8) -> JacPtx8 {
JacPtx8 {
x: blend_x8(mask, on_true.x, on_false.x),
y: blend_x8(mask, on_true.y, on_false.y),
z: blend_x8(mask, on_true.z, on_false.z),
}
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn pt_add_mixed_x8(p: JacPtx8, qx: U256x8, qy: U256x8) -> JacPtx8 {
let JacPtx8 {
x: x1,
y: y1,
z: z1,
} = p;
let z1_or = _mm512_or_epi64(
z1.limbs[0],
_mm512_or_epi64(
z1.limbs[1],
_mm512_or_epi64(z1.limbs[2], _mm512_or_epi64(z1.limbs[3], z1.limbs[4])),
),
);
let z1_zero: __mmask8 = _mm512_cmpeq_epi64_mask(z1_or, _mm512_setzero_si512());
let z1z1 = fp_sq_x8(z1);
let u2 = fp_mul_x8(qx, z1z1);
let z1_cub = fp_mul_x8(z1, z1z1);
let s2 = fp_mul_x8(qy, z1_cub);
let h = fp_sub_x8(u2, x1);
let r_half = fp_sub_x8(s2, y1);
let r = fp_add_x8(r_half, r_half);
let hh = fp_sq_x8(h);
let i_val = fp_add_x8(fp_add_x8(hh, hh), fp_add_x8(hh, hh));
let j = fp_mul_x8(h, i_val);
let v = fp_mul_x8(x1, i_val);
let x3 = fp_sub_x8(fp_sub_x8(fp_sq_x8(r), j), fp_add_x8(v, v));
let y1j = fp_mul_x8(y1, j);
let y3 = fp_sub_x8(fp_mul_x8(r, fp_sub_x8(v, x3)), fp_add_x8(y1j, y1j));
let z3 = fp_sub_x8(fp_sub_x8(fp_sq_x8(fp_add_x8(z1, h)), z1z1), hh);
let res = JacPtx8 {
x: x3,
y: y3,
z: z3,
};
if z1_zero == 0 {
return res;
}
let one_52 = U256x8 {
limbs: [
_mm512_set1_epi64(1),
_mm512_setzero_si512(),
_mm512_setzero_si512(),
_mm512_setzero_si512(),
_mm512_setzero_si512(),
],
};
let q_jac = JacPtx8 {
x: qx,
y: qy,
z: one_52,
};
blend_jacpt_x8(z1_zero, q_jac, res)
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub unsafe fn pt_add_x8(p: JacPtx8, q: JacPtx8) -> JacPtx8 {
let JacPtx8 {
x: x1,
y: y1,
z: z1,
} = p;
let JacPtx8 {
x: x2,
y: y2,
z: z2,
} = q;
let or5 = |v: U256x8| {
_mm512_or_epi64(
v.limbs[0],
_mm512_or_epi64(
v.limbs[1],
_mm512_or_epi64(v.limbs[2], _mm512_or_epi64(v.limbs[3], v.limbs[4])),
),
)
};
let zero512 = _mm512_setzero_si512();
let z1_zero: __mmask8 = _mm512_cmpeq_epi64_mask(or5(z1), zero512);
let z2_zero: __mmask8 = _mm512_cmpeq_epi64_mask(or5(z2), zero512);
let z1z1 = fp_sq_x8(z1);
let z2z2 = fp_sq_x8(z2);
let u1 = fp_mul_x8(x1, z2z2);
let u2 = fp_mul_x8(x2, z1z1);
let s1 = fp_mul_x8(y1, fp_mul_x8(z2, z2z2));
let s2 = fp_mul_x8(y2, fp_mul_x8(z1, z1z1));
let h = fp_sub_x8(u2, u1);
let r_half = fp_sub_x8(s2, s1);
let r = fp_add_x8(r_half, r_half);
let hh = fp_sq_x8(h);
let i_val = fp_add_x8(fp_add_x8(hh, hh), fp_add_x8(hh, hh));
let j = fp_mul_x8(h, i_val);
let v = fp_mul_x8(u1, i_val);
let x3 = fp_sub_x8(fp_sub_x8(fp_sq_x8(r), j), fp_add_x8(v, v));
let s1j = fp_mul_x8(s1, j);
let y3 = fp_sub_x8(fp_mul_x8(r, fp_sub_x8(v, x3)), fp_add_x8(s1j, s1j));
let z3 = fp_mul_x8(
fp_sub_x8(fp_sub_x8(fp_sq_x8(fp_add_x8(z1, z2)), z1z1), z2z2),
h,
);
let res = JacPtx8 {
x: x3,
y: y3,
z: z3,
};
let after_z2 = blend_jacpt_x8(z2_zero, p, res);
blend_jacpt_x8(z1_zero, q, after_z2)
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub(super) unsafe fn scalar_mul_g_x8(scalars: [U256; 8]) -> JacPtx8 {
let dummy_s129 = S129 {
mag: 0,
hi: false,
neg: false,
};
let mut k1s = [dummy_s129; 8];
let mut k2s = [dummy_s129; 8];
for i in 0..8 {
let (k1, k2) = glv_decompose(&scalars[i]);
k1s[i] = k1;
k2s[i] = k2;
}
let mut naf1 = [[0i8; 131]; 8];
let mut naf2 = [[0i8; 131]; 8];
for i in 0..8 {
naf1[i] = wnaf_129(k1s[i].mag, k1s[i].hi);
naf2[i] = wnaf_129(k2s[i].mag, k2s[i].hi);
}
let zero8 = U256x8 {
limbs: [_mm512_setzero_si512(); 5],
};
let mut acc = JacPtx8 {
x: zero8,
y: zero8,
z: zero8,
};
let dummy_pt = [1u64, 0, 0, 0];
for bit in (0..131usize).rev() {
acc = pt_double_x8(acc);
let mut add_mask1: u8 = 0;
let mut qx1 = [dummy_pt; 8];
let mut qy1 = [dummy_pt; 8];
for lane in 0..8 {
let d = naf1[lane][bit];
if d != 0 {
add_mask1 |= 1 << lane;
let (qx, qy) = g_table_lookup(d, k1s[lane].neg);
qx1[lane] = qx.0;
qy1[lane] = qy.0;
}
}
if add_mask1 != 0 {
let new_acc = pt_add_mixed_x8(acc, load(&qx1), load(&qy1));
acc = blend_jacpt_x8(add_mask1, new_acc, acc);
}
let mut add_mask2: u8 = 0;
let mut qx2 = [dummy_pt; 8];
let mut qy2 = [dummy_pt; 8];
for lane in 0..8 {
let d = naf2[lane][bit];
if d != 0 {
add_mask2 |= 1 << lane;
let (qx, qy) = phi_g_table_lookup(d, k2s[lane].neg);
qx2[lane] = qx.0;
qy2[lane] = qy.0;
}
}
if add_mask2 != 0 {
let new_acc = pt_add_mixed_x8(acc, load(&qx2), load(&qy2));
acc = blend_jacpt_x8(add_mask2, new_acc, acc);
}
}
acc
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub(super) unsafe fn scalar_mul_affine_x8(
scalars: [U256; 8],
px_arr: [U256; 8],
py_arr: [U256; 8],
) -> JacPtx8 {
let dummy_s129 = S129 {
mag: 0,
hi: false,
neg: false,
};
let mut k1s = [dummy_s129; 8];
let mut k2s = [dummy_s129; 8];
for i in 0..8 {
let (k1, k2) = glv_decompose(&scalars[i]);
k1s[i] = k1;
k2s[i] = k2;
}
let mut naf1 = [[0i8; 131]; 8];
let mut naf2 = [[0i8; 131]; 8];
for i in 0..8 {
naf1[i] = wnaf_129(k1s[i].mag, k1s[i].hi);
naf2[i] = wnaf_129(k2s[i].mag, k2s[i].hi);
}
let inf = JacPt::infinity();
let mut tables1 = [[inf; 8]; 8]; let mut tables2 = [[inf; 8]; 8];
for lane in 0..8 {
let p1_base = if k1s[lane].neg {
JacPt::from_affine(px_arr[lane], scalar_fp_neg(&py_arr[lane]))
} else {
JacPt::from_affine(px_arr[lane], py_arr[lane])
};
let phi_x = scalar_fp_mul(&px_arr[lane], &SCALAR_BETA);
let phi_p = JacPt::from_affine(phi_x, py_arr[lane]);
let p2_base = if k2s[lane].neg { pt_neg(&phi_p) } else { phi_p };
tables1[lane] = build_table(&p1_base);
tables2[lane] = build_table(&p2_base);
}
let zero8 = U256x8 {
limbs: [_mm512_setzero_si512(); 5],
};
let mut acc = JacPtx8 {
x: zero8,
y: zero8,
z: zero8,
};
let dummy_u256 = [1u64, 0, 0, 0];
for bit in (0..131usize).rev() {
acc = pt_double_x8(acc);
let mut add_mask1: u8 = 0;
let mut tx1 = [dummy_u256; 8];
let mut ty1 = [dummy_u256; 8];
let mut tz1 = [dummy_u256; 8];
for lane in 0..8 {
let d = naf1[lane][bit];
if d != 0 {
add_mask1 |= 1 << lane;
let jpt = table_get(&tables1[lane], d);
tx1[lane] = jpt.x.0;
ty1[lane] = jpt.y.0;
tz1[lane] = jpt.z.0;
}
}
if add_mask1 != 0 {
let q8 = JacPtx8 {
x: load(&tx1),
y: load(&ty1),
z: load(&tz1),
};
let new_acc = pt_add_x8(acc, q8);
acc = blend_jacpt_x8(add_mask1, new_acc, acc);
}
let mut add_mask2: u8 = 0;
let mut tx2 = [dummy_u256; 8];
let mut ty2 = [dummy_u256; 8];
let mut tz2 = [dummy_u256; 8];
for lane in 0..8 {
let d = naf2[lane][bit];
if d != 0 {
add_mask2 |= 1 << lane;
let jpt = table_get(&tables2[lane], d);
tx2[lane] = jpt.x.0;
ty2[lane] = jpt.y.0;
tz2[lane] = jpt.z.0;
}
}
if add_mask2 != 0 {
let q8 = JacPtx8 {
x: load(&tx2),
y: load(&ty2),
z: load(&tz2),
};
let new_acc = pt_add_x8(acc, q8);
acc = blend_jacpt_x8(add_mask2, new_acc, acc);
}
}
acc
}
#[target_feature(enable = "avx512f,avx512ifma")]
pub(super) unsafe fn to_affine_x8(p: JacPtx8) -> [(U256, U256); 8] {
let zs_raw = store(p.z);
let zs: [U256; 8] = core::array::from_fn(|i| U256(zs_raw[i]));
let mut prefix = [U256([1, 0, 0, 0]); 8];
prefix[0] = zs[0];
for i in 1..8 {
prefix[i] = scalar_fp_mul(&prefix[i - 1], &zs[i]);
}
let inv_all = scalar_fp_inv(&prefix[7]).expect("to_affine_x8: Z=0 (infinity) lane");
let mut inv_z = [U256([1, 0, 0, 0]); 8];
let mut running = inv_all;
for i in (1..8).rev() {
inv_z[i] = scalar_fp_mul(&running, &prefix[i - 1]);
running = scalar_fp_mul(&running, &zs[i]);
}
inv_z[0] = running;
let xs_raw = store(p.x);
let ys_raw = store(p.y);
core::array::from_fn(|i| {
let zi = inv_z[i];
let z2 = scalar_fp_sq(&zi);
let z3 = scalar_fp_mul(&z2, &zi);
(
scalar_fp_mul(&U256(xs_raw[i]), &z2),
scalar_fp_mul(&U256(ys_raw[i]), &z3),
)
})
}
#[cfg(test)]
mod tests_x8 {
use super::*;
fn check_avx512() -> bool {
is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512ifma")
}
#[test]
fn test_52bit_roundtrip() {
let v = [P0, P1, P2, P3];
assert_eq!(from_52bit(to_52bit(v)), v, "round-trip failed");
let v2 = [
0x0102030405060708u64,
0xFEDCBA9876543210,
0xDEADBEEFCAFEBABE,
0x0000000000000001,
];
assert_eq!(from_52bit(to_52bit(v2)), v2, "round-trip failed for v2");
}
#[test]
fn test_fp_mul_x8_matches_scalar() {
if !check_avx512() {
return;
}
let a_val = [
0x59F2815B16F81798u64,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
];
let b_val = [
0xFFFFFFFEFFFFFC2Eu64,
0xFFFFFFFFFFFFFFFF,
0xFFFFFFFFFFFFFFFF,
0xFFFFFFFFFFFFFFFF,
];
let a_u256 = U256(a_val);
let b_u256 = U256(b_val);
let expected = scalar_fp_mul(&a_u256, &b_u256);
let a8 = unsafe { load(&[a_val; 8]) };
let b8 = unsafe { load(&[b_val; 8]) };
let c8 = unsafe { fp_mul_x8(a8, b8) };
let got = unsafe { store(c8) };
for lane in 0..8 {
let got_u256 = U256(got[lane]);
assert_eq!(
got_u256, expected,
"fp_mul_x8 lane {lane} mismatch: got {:?} expected {:?}",
got[lane], expected.0
);
}
}
#[test]
fn test_fp_add_neg_x8() {
if !check_avx512() {
return;
}
let a_val = [
0x59F2815B16F81798u64,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
];
let a8 = unsafe { load(&[a_val; 8]) };
let neg_a8 = unsafe { fp_neg_x8(a8) };
let sum8 = unsafe { fp_add_x8(a8, neg_a8) };
let got = unsafe { store(sum8) };
for lane in 0..8 {
assert_eq!(got[lane], [0u64; 4], "a + (-a) != 0 at lane {lane}");
}
}
#[test]
fn test_fp_sq_x8_matches_scalar() {
if !check_avx512() {
return;
}
let a_val = [
0x59F2815B16F81798u64,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
];
let a_u256 = U256(a_val);
let expected = scalar_fp_mul(&a_u256, &a_u256);
let a8 = unsafe { load(&[a_val; 8]) };
let c8 = unsafe { fp_sq_x8(a8) };
let got = unsafe { store(c8) };
for lane in 0..8 {
assert_eq!(U256(got[lane]), expected, "fp_sq_x8 lane {lane} mismatch");
}
}
#[test]
fn test_fp_sqrt_x8_matches_scalar() {
if !check_avx512() {
return;
}
let rhs_val = {
let gx = U256([
0x59F2815B16F81798u64,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
]);
let gx3 = scalar_fp_mul(&scalar_fp_sq(&gx), &gx);
let b7 = U256([7, 0, 0, 0]);
scalar_fp_add(&gx3, &b7)
};
let expected = scalar_fp_sqrt(&rhs_val).expect("Gx^3+7 must be a QR");
let a_val = rhs_val.0;
let a8 = unsafe { load(&[a_val; 8]) };
let r8 = unsafe { fp_sqrt_x8(a8) };
let sq8 = unsafe { fp_sq_x8(r8) };
let got_sq = unsafe { store(sq8) };
let got_r = unsafe { store(r8) };
for lane in 0..8 {
assert_eq!(
U256(got_sq[lane]),
rhs_val,
"fp_sqrt_x8: r^2 != a at lane {lane}"
);
let r_u256 = U256(got_r[lane]);
assert!(
r_u256 == expected || scalar_fp_neg(&r_u256) == expected,
"fp_sqrt_x8 lane {lane}: result is neither root"
);
}
}
#[test]
fn test_fp_mul_by_one_x8() {
if !check_avx512() {
return;
}
let a_val = [
0x59F2815B16F81798u64,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
];
let one_val = [1u64, 0, 0, 0];
let a8 = unsafe { load(&[a_val; 8]) };
let one8 = unsafe { load(&[one_val; 8]) };
let c8 = unsafe { fp_mul_x8(a8, one8) };
let got = unsafe { store(c8) };
for lane in 0..8 {
assert_eq!(got[lane], a_val, "a * 1 != a at lane {lane}");
}
}
#[test]
fn test_fn_mul_x8_matches_scalar() {
if !check_avx512() {
return;
}
let a_val = [
0x59F2815B16F81798u64,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
];
let b_val = [
0xBFD25E8CD036413Fu64,
0xBAAEDCE6AF48A03B,
0xFFFFFFFFFFFFFFFE,
0xFFFFFFFFFFFFFFFF,
]; let expected = scalar_fn_mul(&U256(a_val), &U256(b_val));
let a8 = unsafe { load(&[a_val; 8]) };
let b8 = unsafe { load(&[b_val; 8]) };
let c8 = unsafe { fn_mul_x8(a8, b8) };
let got = unsafe { store(c8) };
for lane in 0..8 {
assert_eq!(
U256(got[lane]),
expected,
"fn_mul_x8 lane {lane}: got {:?} expected {:?}",
got[lane],
expected.0
);
}
}
#[test]
fn test_fn_mul_by_one_x8() {
if !check_avx512() {
return;
}
let a_val = [
0x59F2815B16F81798u64,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
];
let one_val = [1u64, 0, 0, 0];
let a8 = unsafe { load(&[a_val; 8]) };
let one8 = unsafe { load(&[one_val; 8]) };
let c8 = unsafe { fn_mul_x8(a8, one8) };
let got = unsafe { store(c8) };
for lane in 0..8 {
assert_eq!(got[lane], a_val, "fn_mul_x8: a * 1 != a at lane {lane}");
}
}
#[test]
fn test_pt_double_x8_matches_scalar() {
if !check_avx512() {
return;
}
let to_affine_scalar = |rx: [u64; 4], ry: [u64; 4], rz: [u64; 4]| -> (U256, U256) {
let z = U256(rz);
let z_inv = scalar_fp_inv(&z).expect("z should be nonzero");
let z2 = scalar_fp_sq(&z_inv);
let z3 = scalar_fp_mul(&z2, &z_inv);
(scalar_fp_mul(&U256(rx), &z2), scalar_fp_mul(&U256(ry), &z3))
};
let gx = U256([
0x59F2815B16F81798,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
]);
let gy = U256([
0x9C47D08FFB10D4B8,
0xFD17B448A6855419,
0x5DA4FBFC0E1108A8,
0x483ADA7726A3C465,
]);
let one = [1u64, 0, 0, 0];
let g_jac = JacPt::from_affine(gx, gy);
let two_g_jac = pt_double(&g_jac);
let (ref_x, ref_y) = two_g_jac.to_affine().expect("2G should not be infinity");
let p8 = JacPtx8 {
x: unsafe { load(&[gx.0; 8]) },
y: unsafe { load(&[gy.0; 8]) },
z: unsafe { load(&[one; 8]) },
};
let d8 = unsafe { pt_double_x8(p8) };
let rxs = unsafe { store(d8.x) };
let rys = unsafe { store(d8.y) };
let rzs = unsafe { store(d8.z) };
for lane in 0..8 {
let (ax, ay) = to_affine_scalar(rxs[lane], rys[lane], rzs[lane]);
assert_eq!(ax, ref_x, "pt_double_x8 x mismatch at lane {lane}");
assert_eq!(ay, ref_y, "pt_double_x8 y mismatch at lane {lane}");
}
let p2 = JacPtx8 {
x: d8.x,
y: d8.y,
z: d8.z,
};
let d2_8 = unsafe { pt_double_x8(p2) };
let four_g_jac = pt_double(&two_g_jac);
let (ref4_x, ref4_y) = four_g_jac.to_affine().expect("4G should not be infinity");
let rxs2 = unsafe { store(d2_8.x) };
let rys2 = unsafe { store(d2_8.y) };
let rzs2 = unsafe { store(d2_8.z) };
for lane in 0..8 {
let (ax, ay) = to_affine_scalar(rxs2[lane], rys2[lane], rzs2[lane]);
assert_eq!(ax, ref4_x, "pt_double_x8 4G x mismatch at lane {lane}");
assert_eq!(ay, ref4_y, "pt_double_x8 4G y mismatch at lane {lane}");
}
}
#[test]
fn test_fn_neg_x8_matches_scalar() {
if !check_avx512() {
return;
}
let test_vals: [[u64; 4]; 5] = [
[0, 0, 0, 0], [1, 0, 0, 0], [7, 0, 0, 0], [
0xBFD25E8CD036413E,
0xBAAEDCE6AF48A03B,
0xFFFFFFFFFFFFFFFE,
0xFFFFFFFFFFFFFFFF,
],
[
0xDEADBEEFCAFEBABE,
0x1234567890ABCDEF,
0xFEDCBA0987654321,
0x0102030405060708,
],
];
for a_val in test_vals {
let expected = scalar_fn_neg(&U256(a_val));
let a8 = unsafe { load(&[a_val; 8]) };
let neg8 = unsafe { fn_neg_x8(a8) };
let got = unsafe { store(neg8) };
for lane in 0..8 {
assert_eq!(
U256(got[lane]),
expected,
"fn_neg_x8 lane {lane}: got {:?} expected {:?}",
got[lane],
expected.0
);
}
if a_val != [0, 0, 0, 0] {
let neg_neg8 = unsafe { fn_neg_x8(neg8) };
let got2 = unsafe { store(neg_neg8) };
for lane in 0..8 {
assert_eq!(
got2[lane], a_val,
"fn_neg_x8: neg(neg(a)) != a at lane {lane}"
);
}
}
}
}
#[test]
fn test_fn_inv_x8_matches_scalar() {
if !check_avx512() {
return;
}
let test_vals: [[u64; 4]; 4] = [
[
0x59F2815B16F81798,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
],
[7, 0, 0, 0],
[
0xBFD25E8CD036413D,
0xBAAEDCE6AF48A03B,
0xFFFFFFFFFFFFFFFE,
0xFFFFFFFFFFFFFFFF,
],
[
0xDEADBEEFCAFEBABE,
0x1234567890ABCDEF,
0xFEDCBA0987654321,
0x0102030405060708,
],
];
for a_val in test_vals {
let expected = scalar_fn_inv(&U256(a_val)).expect("scalar_fn_inv failed");
let a8 = unsafe { load(&[a_val; 8]) };
let inv8 = unsafe { fn_inv_x8(a8) };
let got = unsafe { store(inv8) };
for lane in 0..8 {
assert_eq!(
U256(got[lane]),
expected,
"fn_inv_x8 lane {lane}: got {:?} expected {:?}",
got[lane],
expected.0
);
}
let prod8 = unsafe { fn_mul_x8(a8, inv8) };
let prod = unsafe { store(prod8) };
for lane in 0..8 {
assert_eq!(
prod[lane],
[1, 0, 0, 0],
"fn_inv_x8: a * inv(a) != 1 at lane {lane}"
);
}
}
}
fn jac_to_affine(rx: [u64; 4], ry: [u64; 4], rz: [u64; 4]) -> (U256, U256) {
let z = U256(rz);
let zi = scalar_fp_inv(&z).expect("z should be nonzero");
let z2 = scalar_fp_sq(&zi);
let z3 = scalar_fp_mul(&z2, &zi);
(scalar_fp_mul(&U256(rx), &z2), scalar_fp_mul(&U256(ry), &z3))
}
fn gx() -> U256 {
U256([
0x59F2815B16F81798,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
])
}
fn gy() -> U256 {
U256([
0x9C47D08FFB10D4B8,
0xFD17B448A6855419,
0x5DA4FBFC0E1108A8,
0x483ADA7726A3C465,
])
}
#[test]
fn test_pt_add_mixed_x8_matches_scalar() {
if !check_avx512() {
return;
}
let three_gx = U256([
0x8601F113BCE036F9,
0xB531C845836F99B0,
0x49344F85F89D5229,
0xF9308A019258C310,
]);
let three_gy = U256([
0x6CB9FD7584B8E672,
0x6500A99934C2231B,
0x0FE337E62A37F356,
0x388F7B0F632DE814,
]);
let g_jac = JacPt::from_affine(gx(), gy());
let two_g = pt_double(&g_jac);
let four_g = pt_double(&two_g);
let (ref_x, ref_y) = four_g.to_affine().expect("4G not infinity");
let one = [1u64, 0, 0, 0];
let p8 = JacPtx8 {
x: unsafe { load(&[gx().0; 8]) },
y: unsafe { load(&[gy().0; 8]) },
z: unsafe { load(&[one; 8]) },
};
let qx8 = unsafe { load(&[three_gx.0; 8]) };
let qy8 = unsafe { load(&[three_gy.0; 8]) };
let r8 = unsafe { pt_add_mixed_x8(p8, qx8, qy8) };
let rxs = unsafe { store(r8.x) };
let rys = unsafe { store(r8.y) };
let rzs = unsafe { store(r8.z) };
for lane in 0..8 {
let (ax, ay) = jac_to_affine(rxs[lane], rys[lane], rzs[lane]);
assert_eq!(ax, ref_x, "pt_add_mixed_x8 x mismatch at lane {lane}");
assert_eq!(ay, ref_y, "pt_add_mixed_x8 y mismatch at lane {lane}");
}
let zero_pt = JacPtx8 {
x: unsafe { load(&[[0u64; 4]; 8]) },
y: unsafe { load(&[[0u64; 4]; 8]) },
z: unsafe { load(&[[0u64; 4]; 8]) },
};
let r_inf = unsafe { pt_add_mixed_x8(zero_pt, qx8, qy8) };
let rx_inf = unsafe { store(r_inf.x) };
let ry_inf = unsafe { store(r_inf.y) };
let rz_inf = unsafe { store(r_inf.z) };
let (ref3_x, ref3_y) = (three_gx, three_gy);
for lane in 0..8 {
let (ax, ay) = jac_to_affine(rx_inf[lane], ry_inf[lane], rz_inf[lane]);
assert_eq!(
ax, ref3_x,
"pt_add_mixed_x8 infinity x mismatch at lane {lane}"
);
assert_eq!(
ay, ref3_y,
"pt_add_mixed_x8 infinity y mismatch at lane {lane}"
);
}
}
#[test]
fn test_pt_add_x8_matches_scalar() {
if !check_avx512() {
return;
}
let ref_x = U256([
0x8601F113BCE036F9,
0xB531C845836F99B0,
0x49344F85F89D5229,
0xF9308A019258C310,
]);
let ref_y = U256([
0x6CB9FD7584B8E672,
0x6500A99934C2231B,
0x0FE337E62A37F356,
0x388F7B0F632DE814,
]);
let one = [1u64, 0, 0, 0];
let g_x8 = JacPtx8 {
x: unsafe { load(&[gx().0; 8]) },
y: unsafe { load(&[gy().0; 8]) },
z: unsafe { load(&[one; 8]) },
};
let two_g_x8 = unsafe { pt_double_x8(g_x8) };
let r8 = unsafe { pt_add_x8(g_x8, two_g_x8) };
let rxs = unsafe { store(r8.x) };
let rys = unsafe { store(r8.y) };
let rzs = unsafe { store(r8.z) };
for lane in 0..8 {
let (ax, ay) = jac_to_affine(rxs[lane], rys[lane], rzs[lane]);
assert_eq!(ax, ref_x, "pt_add_x8 x mismatch at lane {lane}");
assert_eq!(ay, ref_y, "pt_add_x8 y mismatch at lane {lane}");
}
}
#[test]
fn test_scalar_mul_g_x8_matches_scalar() {
if !check_avx512() {
return;
}
let scalars: [U256; 8] = core::array::from_fn(|i| U256([(i + 1) as u64, 0, 0, 0]));
let refs: [(U256, U256); 8] = core::array::from_fn(|i| {
scalar_mul_g(&scalars[i])
.to_affine()
.expect("k·G not infinity")
});
let r8 = unsafe { scalar_mul_g_x8(scalars) };
let aff = unsafe { to_affine_x8(r8) };
for lane in 0..8 {
assert_eq!(
aff[lane].0, refs[lane].0,
"scalar_mul_g_x8 x mismatch at lane {lane}"
);
assert_eq!(
aff[lane].1, refs[lane].1,
"scalar_mul_g_x8 y mismatch at lane {lane}"
);
}
}
#[test]
fn test_scalar_mul_affine_x8_matches_scalar() {
if !check_avx512() {
return;
}
let three_gx = U256([
0x8601F113BCE036F9,
0xB531C845836F99B0,
0x49344F85F89D5229,
0xF9308A019258C310,
]);
let three_gy = U256([
0x6CB9FD7584B8E672,
0x6500A99934C2231B,
0x0FE337E62A37F356,
0x388F7B0F632DE814,
]);
let base_xs: [U256; 8] =
core::array::from_fn(|i| if i % 2 == 0 { gx() } else { three_gx });
let base_ys: [U256; 8] =
core::array::from_fn(|i| if i % 2 == 0 { gy() } else { three_gy });
let scalars: [U256; 8] = core::array::from_fn(|i| U256([(i + 2) as u64, 0, 0, 0]));
let refs: [(U256, U256); 8] = core::array::from_fn(|i| {
scalar_mul_affine(&scalars[i], &base_xs[i], &base_ys[i])
.to_affine()
.expect("k·P not infinity")
});
let r8 = unsafe { scalar_mul_affine_x8(scalars, base_xs, base_ys) };
let aff = unsafe { to_affine_x8(r8) };
for lane in 0..8 {
assert_eq!(
aff[lane].0, refs[lane].0,
"scalar_mul_affine_x8 x mismatch at lane {lane}"
);
assert_eq!(
aff[lane].1, refs[lane].1,
"scalar_mul_affine_x8 y mismatch at lane {lane}"
);
}
}
}
}
fn recover_one(hash: &[u8; 32], r: &[u8; 32], s: &[u8; 32], v: u8) -> [u8; 20] {
let r_u = U256::from_be_bytes(r);
let s_u = U256::from_be_bytes(s);
let z = U256::from_be_bytes(hash);
if r_u.is_zero() || r_u.ge(&SCALAR_N) {
return [0u8; 20];
}
if s_u.is_zero() || s_u.ge(&SCALAR_N) {
return [0u8; 20];
}
let r_x = r_u;
let r_x3 = scalar_fp_mul(&scalar_fp_sq(&r_x), &r_x);
let b7 = U256([7, 0, 0, 0]);
let rhs = scalar_fp_add(&r_x3, &b7);
let mut r_y = match scalar_fp_sqrt(&rhs) {
Some(y) => y,
None => return [0u8; 20],
};
if (r_y.0[0] & 1) != (v & 1) as u64 {
r_y = scalar_fp_neg(&r_y);
}
let r_inv = match scalar_fn_inv(&r_u) {
Some(i) => i,
None => return [0u8; 20],
};
let u1 = scalar_fn_neg(&scalar_fn_mul(&z, &r_inv));
let u2 = scalar_fn_mul(&s_u, &r_inv);
let p1 = scalar_mul_g(&u1);
let p2 = scalar_mul_affine(&u2, &r_x, &r_y);
let q = if p1.is_infinity() {
p2
} else if p2.is_infinity() {
p1
} else {
match p1.to_affine() {
Some((p1x, p1y)) => pt_add_mixed(&p2, &p1x, &p1y),
None => return [0u8; 20],
}
};
let (qx, qy) = match q.to_affine() {
Some(a) => a,
None => return [0u8; 20],
};
let mut pubkey_xy = [0u8; 64];
pubkey_xy[0..32].copy_from_slice(&qx.to_be_bytes());
pubkey_xy[32..64].copy_from_slice(&qy.to_be_bytes());
let h = crate::keccak_scalar::keccak256(&pubkey_xy);
let mut addr = [0u8; 20];
addr.copy_from_slice(&h[12..]);
addr
}
impl U256 {
fn to_be_bytes(self) -> [u8; 32] {
let mut out = [0u8; 32];
out[0..8].copy_from_slice(&self.0[3].to_be_bytes());
out[8..16].copy_from_slice(&self.0[2].to_be_bytes());
out[16..24].copy_from_slice(&self.0[1].to_be_bytes());
out[24..32].copy_from_slice(&self.0[0].to_be_bytes());
out
}
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx512f,avx512bw,avx512dq,avx512ifma")]
unsafe fn recover_addresses_avx512(
hashes: [&[u8; 32]; 8],
rs: [&[u8; 32]; 8],
ss: [&[u8; 32]; 8],
vs: [u8; 8],
) -> [[u8; 20]; 8] {
use x8::{
blend_x8, fn_inv_x8, fn_mul_x8, fn_neg_x8, fp_add_x8, fp_mul_x8, fp_neg_x8, fp_sq_x8,
fp_sqrt_x8, load, pt_add_x8, scalar_mul_affine_x8, scalar_mul_g_x8, store, to_affine_x8,
};
let r_vals: [U256; 8] = core::array::from_fn(|i| U256::from_be_bytes(rs[i]));
let s_vals: [U256; 8] = core::array::from_fn(|i| U256::from_be_bytes(ss[i]));
let z_vals: [U256; 8] = core::array::from_fn(|i| U256::from_be_bytes(hashes[i]));
let mut valid = [true; 8];
for lane in 0..8 {
if r_vals[lane].is_zero()
|| r_vals[lane].ge(&SCALAR_N)
|| s_vals[lane].is_zero()
|| s_vals[lane].ge(&SCALAR_N)
{
valid[lane] = false;
}
}
let rs_raw: [[u64; 4]; 8] = core::array::from_fn(|i| r_vals[i].0);
let ss_raw: [[u64; 4]; 8] = core::array::from_fn(|i| s_vals[i].0);
let zs_raw: [[u64; 4]; 8] = core::array::from_fn(|i| z_vals[i].0);
let r_x8 = unsafe { load(&rs_raw) };
let s_x8 = unsafe { load(&ss_raw) };
let z_x8 = unsafe { load(&zs_raw) };
let r_cu_x8 = unsafe { fp_mul_x8(fp_sq_x8(r_x8), r_x8) };
let seven_x8 = unsafe { load(&[[7u64, 0, 0, 0]; 8]) };
let rhs_x8 = unsafe { fp_add_x8(r_cu_x8, seven_x8) };
let r_y_raw_x8 = unsafe { fp_sqrt_x8(rhs_x8) };
{
let check_x8 = unsafe { fp_sq_x8(r_y_raw_x8) };
let check_raw = unsafe { store(check_x8) };
let rhs_raw = unsafe { store(rhs_x8) };
for lane in 0..8 {
if valid[lane] && check_raw[lane] != rhs_raw[lane] {
valid[lane] = false;
}
}
}
let r_y_neg_x8 = unsafe { fp_neg_x8(r_y_raw_x8) };
let parity_mask: u8 = {
let r_y_stored = unsafe { store(r_y_raw_x8) };
let mut mask: u8 = 0;
for lane in 0..8 {
let y_parity = (r_y_stored[lane][0] & 1) as u8;
if y_parity != (vs[lane] & 1) {
mask |= 1u8 << lane;
}
}
mask
};
let r_y_x8 = unsafe { blend_x8(parity_mask, r_y_neg_x8, r_y_raw_x8) };
let r_inv_x8 = unsafe { fn_inv_x8(r_x8) };
let u1_x8 = unsafe { fn_neg_x8(fn_mul_x8(z_x8, r_inv_x8)) };
let u2_x8 = unsafe { fn_mul_x8(s_x8, r_inv_x8) };
let u1_raw = unsafe { store(u1_x8) };
let u2_raw = unsafe { store(u2_x8) };
let ry_raw = unsafe { store(r_y_x8) };
let dummy_x = U256([
0x59F2815B16F81798,
0x029BFCDB2DCE28D9,
0x55A06295CE870B07,
0x79BE667EF9DCBBAC,
]);
let dummy_y = U256([
0x9C47D08FFB10D4B8,
0xFD17B448A6855419,
0x5DA4FBFC0E1108A8,
0x483ADA7726A3C465,
]);
let mut u1s: [U256; 8] = core::array::from_fn(|i| U256(u1_raw[i]));
let mut u2s: [U256; 8] = core::array::from_fn(|i| U256(u2_raw[i]));
let mut rx_arr: [U256; 8] = r_vals;
let mut ry_arr: [U256; 8] = core::array::from_fn(|i| U256(ry_raw[i]));
for lane in 0..8 {
if !valid[lane] {
u1s[lane] = U256([1, 0, 0, 0]); u2s[lane] = U256([2, 0, 0, 0]);
rx_arr[lane] = dummy_x;
ry_arr[lane] = dummy_y;
}
}
let mut q_x8 = unsafe {
let p1 = scalar_mul_g_x8(u1s);
let p2 = scalar_mul_affine_x8(u2s, rx_arr, ry_arr);
pt_add_x8(p1, p2)
};
{
let zs = unsafe { store(q_x8.z) };
let mut any_zero = false;
for lane in 0..8 {
if zs[lane] == [0u64; 4] {
valid[lane] = false;
any_zero = true;
}
}
if any_zero {
let mut zfix = [[1u64, 0, 0, 0]; 8];
for lane in 0..8 {
if zs[lane] != [0u64; 4] {
zfix[lane] = zs[lane];
}
}
q_x8.z = unsafe { load(&zfix) };
}
}
let aff = unsafe { to_affine_x8(q_x8) };
let mut pubkey_xys: [[u8; 64]; 8] = [[0u8; 64]; 8];
for lane in 0..8 {
if valid[lane] {
pubkey_xys[lane][0..32].copy_from_slice(&aff[lane].0.to_be_bytes());
pubkey_xys[lane][32..64].copy_from_slice(&aff[lane].1.to_be_bytes());
}
}
let inputs: [&[u8]; 8] = std::array::from_fn(|i| pubkey_xys[i].as_slice());
let hashed = keccak256_batch(inputs);
let mut out = [[0u8; 20]; 8];
for lane in 0..8 {
if valid[lane] {
out[lane].copy_from_slice(&hashed[lane][12..]);
}
}
out
}
pub fn recover_addresses_batch(
hashes: [&[u8; 32]; 8],
rs: [&[u8; 32]; 8],
ss: [&[u8; 32]; 8],
vs: [u8; 8],
) -> [[u8; 20]; 8] {
#[cfg(target_arch = "x86_64")]
if is_x86_feature_detected!("avx512f")
&& is_x86_feature_detected!("avx512bw")
&& is_x86_feature_detected!("avx512dq")
&& is_x86_feature_detected!("avx512ifma")
{
return unsafe { recover_addresses_avx512(hashes, rs, ss, vs) };
}
std::array::from_fn(|i| recover_one(hashes[i], rs[i], ss[i], vs[i]))
}
#[cfg(test)]
mod tests {
use super::*;
fn unhex32(s: &str) -> [u8; 32] {
let s = s.strip_prefix("0x").unwrap_or(s);
let bytes: Vec<u8> = (0..s.len())
.step_by(2)
.map(|i| u8::from_str_radix(&s[i..i + 2], 16).unwrap())
.collect();
bytes.try_into().unwrap()
}
fn hex20(b: &[u8; 20]) -> String {
b.iter().map(|x| format!("{x:02x}")).collect()
}
const HASH_HEX: &str = "18c547e4f7b0f325ad1e56f57e26c745b09a3e503d86e00e5255ff7f715d3d1c";
const R_HEX: &str = "73b1693892219d736caba55bdb67216e485557ea6b6af75f37096c9aa6a5a75f";
const S_HEX: &str = "eeb940b1d03b21e36b0e47e79769f095fe2ab855bd91e3a38756b7d75a9c4549";
const V_VAL: u8 = 28 - 27; const ADDR_HEX: &str = "a94f5374fce5edbc8e2a8697c15331677e6ebf0b";
#[test]
fn test_recover_one_known_vector() {
let hash = unhex32(HASH_HEX);
let r = unhex32(R_HEX);
let s = unhex32(S_HEX);
let addr = recover_one(&hash, &r, &s, V_VAL);
assert_eq!(hex20(&addr), ADDR_HEX);
}
#[test]
fn test_batch_all_same_vector() {
let hash = unhex32(HASH_HEX);
let r = unhex32(R_HEX);
let s = unhex32(S_HEX);
let hashes = [&hash; 8];
let rs = [&r; 8];
let ss = [&s; 8];
let vs = [V_VAL; 8];
let addrs = recover_addresses_batch(hashes, rs, ss, vs);
for addr in &addrs {
assert_eq!(hex20(addr), ADDR_HEX, "batch lane mismatch");
}
}
#[test]
fn test_batch_scalar_agreement() {
let hash = unhex32(HASH_HEX);
let r = unhex32(R_HEX);
let s = unhex32(S_HEX);
let hashes = [&hash; 8];
let rs = [&r; 8];
let ss = [&s; 8];
let vs = [V_VAL; 8];
let batch = recover_addresses_batch(hashes, rs, ss, vs);
let scalar: [[u8; 20]; 8] =
std::array::from_fn(|i| recover_one(hashes[i], rs[i], ss[i], vs[i]));
assert_eq!(batch, scalar, "batch/scalar mismatch");
}
#[test]
fn test_batch_invalid_lane_zeroed() {
let hash = unhex32(HASH_HEX);
let r = unhex32(R_HEX);
let s = unhex32(S_HEX);
let bad_r = [0u8; 32];
let mut rs_arr = [&r; 8];
rs_arr[3] = &bad_r;
let hashes = [&hash; 8];
let ss = [&s; 8];
let vs = [V_VAL; 8];
let addrs = recover_addresses_batch(hashes, rs_arr, ss, vs);
assert_eq!(addrs[3], [0u8; 20], "invalid lane should be zeroed");
for i in [0, 1, 2, 4, 5, 6, 7] {
assert_eq!(hex20(&addrs[i]), ADDR_HEX, "valid lane {i} mismatch");
}
}
}