use core::cmp::Ordering;
use crate::hash::noxtls_sha256;
use noxtls_core::{Error, Result};
#[cfg(all(not(feature = "std"), target_has_atomic = "8"))]
use spin::Once;
#[cfg(feature = "std")]
use std::sync::OnceLock;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
struct U256([u32; 8]);
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
struct AffinePoint {
x: U256,
y: U256,
infinity: bool,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
struct JacobianPoint {
x: U256,
y: U256,
z: U256,
infinity: bool,
}
const ZERO: U256 = U256([0; 8]);
const ONE: U256 = U256([1, 0, 0, 0, 0, 0, 0, 0]);
const P: U256 = U256([
0xffff_ffff,
0xffff_ffff,
0xffff_ffff,
0x0000_0000,
0x0000_0000,
0x0000_0000,
0x0000_0001,
0xffff_ffff,
]);
const N: U256 = U256([
0xfc63_2551,
0xf3b9_cac2,
0xa717_9e84,
0xbce6_faad,
0xffff_ffff,
0xffff_ffff,
0x0000_0000,
0xffff_ffff,
]);
const A: U256 = U256([
0xffff_fffc,
0xffff_ffff,
0xffff_ffff,
0x0000_0000,
0x0000_0000,
0x0000_0000,
0x0000_0001,
0xffff_ffff,
]);
const B: U256 = U256([
0x27d2_604b,
0x3bce_3c3e,
0xcc53_b0f6,
0x651d_06b0,
0x7698_86bc,
0xb3eb_bd55,
0xaa3a_93e7,
0x5ac6_35d8,
]);
const GX: U256 = U256([
0xd898_c296,
0xf4a1_3945,
0x2deb_33a0,
0x7703_7d81,
0x63a4_40f2,
0xf8bc_e6e5,
0xe12c_4247,
0x6b17_d1f2,
]);
const GY: U256 = U256([
0x37bf_51f5,
0xcbb6_4068,
0x6b31_5ece,
0x2bce_3357,
0x7c0f_9e16,
0x8ee7_eb4a,
0xfe1a_7f9b,
0x4fe3_42e2,
]);
#[cfg(feature = "std")]
static BASEPOINT_TABLE: OnceLock<[AffinePoint; 16]> = OnceLock::new();
#[cfg(all(not(feature = "std"), target_has_atomic = "8"))]
static BASEPOINT_TABLE: Once<[AffinePoint; 16]> = Once::new();
impl U256 {
fn from_be_bytes(bytes: &[u8; 32]) -> Self {
let mut limbs = [0_u32; 8];
for (i, chunk) in bytes.chunks_exact(4).rev().enumerate() {
limbs[i] = u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
}
Self(limbs)
}
fn to_be_bytes(self) -> [u8; 32] {
let mut out = [0_u8; 32];
for (chunk, limb) in out.chunks_exact_mut(4).zip(self.0.iter().rev()) {
chunk.copy_from_slice(&limb.to_be_bytes());
}
out
}
fn cmp(&self, other: &Self) -> Ordering {
for idx in (0..8).rev() {
if self.0[idx] != other.0[idx] {
return self.0[idx].cmp(&other.0[idx]);
}
}
Ordering::Equal
}
fn is_zero(&self) -> bool {
self.0.iter().all(|&limb| limb == 0)
}
fn is_even(&self) -> bool {
(self.0[0] & 1) == 0
}
fn bit_len(&self) -> usize {
for idx in (0..8).rev() {
let limb = self.0[idx];
if limb != 0 {
return idx * 32 + (32 - limb.leading_zeros() as usize);
}
}
0
}
fn nibble_le(&self, nibble_idx: usize) -> u8 {
let bit_idx = nibble_idx * 4;
let limb_idx = bit_idx / 32;
let shift = bit_idx % 32;
let low = u64::from(*self.0.get(limb_idx).unwrap_or(&0));
let high = u64::from(*self.0.get(limb_idx + 1).unwrap_or(&0));
(((low >> shift) | (high << (32 - shift))) & 0x0f) as u8
}
fn shr1_assign(&mut self) {
let mut carry = 0_u32;
for idx in (0..8).rev() {
let next_carry = self.0[idx] & 1;
self.0[idx] = (self.0[idx] >> 1) | (carry << 31);
carry = next_carry;
}
}
fn shr1_with_high_bit_assign(&mut self, high_bit: u32) {
let mut carry = high_bit & 1;
for idx in (0..8).rev() {
let next_carry = self.0[idx] & 1;
self.0[idx] = (self.0[idx] >> 1) | (carry << 31);
carry = next_carry;
}
}
fn add_raw(self, other: Self) -> (Self, u32) {
let mut out = [0_u32; 8];
let mut carry = 0_u64;
for (idx, dst) in out.iter_mut().enumerate() {
let sum = u64::from(self.0[idx]) + u64::from(other.0[idx]) + carry;
*dst = sum as u32;
carry = sum >> 32;
}
(Self(out), carry as u32)
}
fn sub_raw(self, other: Self) -> (Self, u32) {
let mut out = [0_u32; 8];
let mut borrow = 0_u64;
for (idx, dst) in out.iter_mut().enumerate() {
let lhs = u64::from(self.0[idx]);
let rhs = u64::from(other.0[idx]) + borrow;
if lhs < rhs {
*dst = (lhs + (1_u64 << 32) - rhs) as u32;
borrow = 1;
} else {
*dst = (lhs - rhs) as u32;
borrow = 0;
}
}
(Self(out), borrow as u32)
}
#[allow(dead_code)]
fn add_assign_raw(&mut self, other: Self) {
let (value, _) = self.add_raw(other);
*self = value;
}
fn sub_assign_raw(&mut self, other: Self) {
let (value, _) = self.sub_raw(other);
*self = value;
}
fn add_mod(self, other: Self, modulus: Self) -> Self {
let (mut sum, carry) = self.add_raw(other);
if carry != 0 || sum.cmp(&modulus) != Ordering::Less {
sum = sum.sub_raw(modulus).0;
}
sum
}
fn sub_mod(self, other: Self, modulus: Self) -> Self {
let (diff, borrow) = self.sub_raw(other);
if borrow == 0 {
diff
} else {
diff.add_raw(modulus).0
}
}
fn modulo_small(mut self, modulus: Self) -> Self {
while self.cmp(&modulus) != Ordering::Less {
self = self.sub_raw(modulus).0;
}
self
}
}
impl AffinePoint {
fn infinity() -> Self {
Self {
x: ZERO,
y: ZERO,
infinity: true,
}
}
}
impl JacobianPoint {
fn infinity() -> Self {
Self {
x: ZERO,
y: ZERO,
z: ZERO,
infinity: true,
}
}
fn from_affine(point: AffinePoint) -> Self {
if point.infinity {
Self::infinity()
} else {
Self {
x: point.x,
y: point.y,
z: ONE,
infinity: false,
}
}
}
fn to_affine(self) -> Result<AffinePoint> {
if self.infinity || self.z.is_zero() {
return Ok(AffinePoint::infinity());
}
let z_inv = mod_inv(self.z, P)?;
let z_inv2 = mod_mul(z_inv, z_inv, P);
let z_inv3 = mod_mul(z_inv2, z_inv, P);
Ok(AffinePoint {
x: mod_mul(self.x, z_inv2, P),
y: mod_mul(self.y, z_inv3, P),
infinity: false,
})
}
}
pub(crate) fn scalar_mul_basepoint_bytes(scalar: &[u8; 32]) -> Result<([u8; 32], [u8; 32])> {
let point = scalar_mul_basepoint(U256::from_be_bytes(scalar)).to_affine()?;
if point.infinity {
return Err(Error::CryptoFailure(
"p256 public key derivation produced infinity",
));
}
Ok((point.x.to_be_bytes(), point.y.to_be_bytes()))
}
pub(crate) fn ecdh_shared_secret_bytes(
private_scalar: &[u8; 32],
public_x: &[u8; 32],
public_y: &[u8; 32],
) -> Result<[u8; 32]> {
let public = decode_affine(public_x, public_y)?;
let shared = scalar_mul(U256::from_be_bytes(private_scalar), public).to_affine()?;
if shared.infinity {
return Err(Error::CryptoFailure("p256 shared point is at infinity"));
}
let out = shared.x.to_be_bytes();
if out.iter().all(|&b| b == 0) {
return Err(Error::CryptoFailure("p256 shared secret is all-zero"));
}
Ok(out)
}
pub(crate) fn ecdsa_sign_digest_bytes(
private_scalar: &[u8; 32],
digest: &[u8; 32],
) -> Result<([u8; 32], [u8; 32])> {
let d = U256::from_be_bytes(private_scalar);
let e = U256::from_be_bytes(digest).modulo_small(N);
let mut counter = 0_u32;
loop {
let k = derive_nonce(private_scalar, digest, counter).modulo_small(N);
counter = counter.wrapping_add(1);
if let Some(sig) = ecdsa_sign_core(d, e, k)? {
return Ok(sig);
}
if counter == 0 {
return Err(Error::CryptoFailure(
"p256 ecdsa nonce derivation exhausted",
));
}
}
}
pub(crate) fn ecdsa_sign_digest_with_nonce_bytes(
private_scalar: &[u8; 32],
digest: &[u8; 32],
nonce_candidate: &[u8; 32],
) -> Result<Option<([u8; 32], [u8; 32])>> {
let d = U256::from_be_bytes(private_scalar);
let e = U256::from_be_bytes(digest).modulo_small(N);
let k = U256::from_be_bytes(nonce_candidate).modulo_small(N);
ecdsa_sign_core(d, e, k)
}
pub(crate) fn ecdsa_verify_digest_bytes(
public_x: &[u8; 32],
public_y: &[u8; 32],
digest: &[u8; 32],
r: &[u8; 32],
s: &[u8; 32],
) -> Result<()> {
let public = decode_affine(public_x, public_y)?;
let r = U256::from_be_bytes(r);
let s = U256::from_be_bytes(s);
if r.is_zero() || s.is_zero() || r.cmp(&N) != Ordering::Less || s.cmp(&N) != Ordering::Less {
return Err(Error::CryptoFailure(
"p256 ecdsa signature scalars out of range",
));
}
let e = U256::from_be_bytes(digest).modulo_small(N);
let w = mod_inv(s, N)?;
let u1 = mod_mul(e, w, N);
let u2 = mod_mul(r, w, N);
let point = double_scalar_mul_basepoint(u1, u2, public).to_affine()?;
if point.infinity {
return Err(Error::CryptoFailure(
"p256 ecdsa verification produced point at infinity",
));
}
let v = point.x.modulo_small(N);
if v == r {
Ok(())
} else {
Err(Error::CryptoFailure("p256 ecdsa verification failed"))
}
}
pub(crate) fn validate_public_key_bytes(x: &[u8; 32], y: &[u8; 32]) -> bool {
decode_affine(x, y).is_ok()
}
fn decode_affine(x: &[u8; 32], y: &[u8; 32]) -> Result<AffinePoint> {
let point = AffinePoint {
x: U256::from_be_bytes(x),
y: U256::from_be_bytes(y),
infinity: false,
};
if point.x.cmp(&P) != Ordering::Less || point.y.cmp(&P) != Ordering::Less {
return Err(Error::CryptoFailure(
"p256 public point coordinates out of field range",
));
}
if !is_point_on_curve(point) {
return Err(Error::CryptoFailure("p256 public point is not on curve"));
}
Ok(point)
}
fn ecdsa_sign_core(d: U256, e: U256, k: U256) -> Result<Option<([u8; 32], [u8; 32])>> {
if k.is_zero() {
return Ok(None);
}
let rp = scalar_mul_basepoint(k).to_affine()?;
if rp.infinity {
return Ok(None);
}
let r = rp.x.modulo_small(N);
if r.is_zero() {
return Ok(None);
}
let rd = mod_mul(r, d, N);
let e_plus_rd = e.add_mod(rd, N);
let k_inv = mod_inv(k, N)?;
let s = mod_mul(k_inv, e_plus_rd, N);
if s.is_zero() {
return Ok(None);
}
Ok(Some((r.to_be_bytes(), s.to_be_bytes())))
}
fn derive_nonce(private_scalar: &[u8; 32], digest: &[u8; 32], counter: u32) -> U256 {
let mut seed = [0_u8; 68];
seed[..32].copy_from_slice(private_scalar);
seed[32..64].copy_from_slice(digest);
seed[64..].copy_from_slice(&counter.to_be_bytes());
U256::from_be_bytes(&noxtls_sha256(&seed))
}
#[cfg(feature = "std")]
fn basepoint_table() -> [AffinePoint; 16] {
*BASEPOINT_TABLE.get_or_init(|| {
build_affine_window_table(AffinePoint {
x: GX,
y: GY,
infinity: false,
})
})
}
#[cfg(all(not(feature = "std"), target_has_atomic = "8"))]
fn basepoint_table() -> [AffinePoint; 16] {
*BASEPOINT_TABLE.call_once(|| {
build_affine_window_table(AffinePoint {
x: GX,
y: GY,
infinity: false,
})
})
}
#[cfg(all(not(feature = "std"), not(target_has_atomic = "8")))]
fn basepoint_table() -> [AffinePoint; 16] {
build_affine_window_table(AffinePoint {
x: GX,
y: GY,
infinity: false,
})
}
fn scalar_mul_basepoint(scalar: U256) -> JacobianPoint {
let table = basepoint_table();
scalar_mul_basepoint_from_table(scalar, &table)
}
fn scalar_mul(scalar: U256, point: AffinePoint) -> JacobianPoint {
if point.infinity {
return JacobianPoint::infinity();
}
let table = build_window_table(JacobianPoint::from_affine(point));
scalar_mul_from_table(scalar, &table)
}
fn double_scalar_mul_basepoint(
base_scalar: U256,
scalar: U256,
point: AffinePoint,
) -> JacobianPoint {
if point.infinity {
return scalar_mul_basepoint(base_scalar);
}
let base_table = basepoint_table();
let point_table = build_window_table(JacobianPoint::from_affine(point));
let nibble_count = base_scalar.bit_len().max(scalar.bit_len()).div_ceil(4);
let mut acc = JacobianPoint::infinity();
for nibble_idx in (0..nibble_count).rev() {
for _ in 0..4 {
acc = jacobian_double(acc);
}
let base_nibble = usize::from(base_scalar.nibble_le(nibble_idx));
if base_nibble != 0 {
acc = jacobian_add_mixed(acc, base_table[base_nibble]);
}
let point_nibble = usize::from(scalar.nibble_le(nibble_idx));
if point_nibble != 0 {
acc = jacobian_add(acc, point_table[point_nibble]);
}
}
acc
}
fn scalar_mul_basepoint_from_table(scalar: U256, table: &[AffinePoint; 16]) -> JacobianPoint {
let mut acc = JacobianPoint::infinity();
let nibble_count = scalar.bit_len().div_ceil(4);
for nibble_idx in (0..nibble_count).rev() {
for _ in 0..4 {
acc = jacobian_double(acc);
}
let nibble = usize::from(scalar.nibble_le(nibble_idx));
if nibble != 0 {
acc = jacobian_add_mixed(acc, table[nibble]);
}
}
acc
}
fn scalar_mul_from_table(scalar: U256, table: &[JacobianPoint; 16]) -> JacobianPoint {
let mut acc = JacobianPoint::infinity();
let nibble_count = scalar.bit_len().div_ceil(4);
for nibble_idx in (0..nibble_count).rev() {
for _ in 0..4 {
acc = jacobian_double(acc);
}
let nibble = usize::from(scalar.nibble_le(nibble_idx));
if nibble != 0 {
acc = jacobian_add(acc, table[nibble]);
}
}
acc
}
fn build_window_table(base: JacobianPoint) -> [JacobianPoint; 16] {
let mut table = [JacobianPoint::infinity(); 16];
table[1] = base;
for idx in 2..16 {
table[idx] = jacobian_add(table[idx - 1], base);
}
table
}
fn build_affine_window_table(base: AffinePoint) -> [AffinePoint; 16] {
let mut table = [AffinePoint::infinity(); 16];
if base.infinity {
return table;
}
table[1] = base;
let mut acc = JacobianPoint::from_affine(base);
for item in table.iter_mut().skip(2) {
acc = jacobian_add_mixed(acc, base);
*item = acc
.to_affine()
.expect("p256 basepoint affine table entry conversion");
}
table
}
fn is_point_on_curve(point: AffinePoint) -> bool {
if point.infinity {
return false;
}
let y_sq = mod_mul(point.y, point.y, P);
let x_sq = mod_mul(point.x, point.x, P);
let x_cu = mod_mul(x_sq, point.x, P);
let ax = mod_mul(A, point.x, P);
let rhs = x_cu.add_mod(ax, P).add_mod(B, P);
y_sq == rhs
}
fn jacobian_double(a: JacobianPoint) -> JacobianPoint {
if a.infinity || a.y.is_zero() {
return JacobianPoint::infinity();
}
let yy = mod_mul(a.y, a.y, P);
let yyyy = mod_mul(yy, yy, P);
let x_yy = mod_mul(a.x, yy, P);
let s = x_yy.add_mod(x_yy, P).add_mod(x_yy, P).add_mod(x_yy, P);
let zz = mod_mul(a.z, a.z, P);
let x_minus_zz = a.x.sub_mod(zz, P);
let x_plus_zz = a.x.add_mod(zz, P);
let m_term = mod_mul(x_minus_zz, x_plus_zz, P);
let m = m_term.add_mod(m_term, P).add_mod(m_term, P);
let m2 = mod_mul(m, m, P);
let two_s = s.add_mod(s, P);
let x3 = m2.sub_mod(two_s, P);
let s_minus_x3 = s.sub_mod(x3, P);
let m_s_minus_x3 = mod_mul(m, s_minus_x3, P);
let two_yyyy = yyyy.add_mod(yyyy, P);
let four_yyyy = two_yyyy.add_mod(two_yyyy, P);
let eight_yyyy = four_yyyy.add_mod(four_yyyy, P);
let y3 = m_s_minus_x3.sub_mod(eight_yyyy, P);
let yz = mod_mul(a.y, a.z, P);
let z3 = yz.add_mod(yz, P);
JacobianPoint {
x: x3,
y: y3,
z: z3,
infinity: false,
}
}
fn jacobian_add(a: JacobianPoint, b: JacobianPoint) -> JacobianPoint {
if a.infinity {
return b;
}
if b.infinity {
return a;
}
let z1z1 = mod_mul(a.z, a.z, P);
let z2z2 = mod_mul(b.z, b.z, P);
let u1 = mod_mul(a.x, z2z2, P);
let u2 = mod_mul(b.x, z1z1, P);
let z1_cubed = mod_mul(z1z1, a.z, P);
let z2_cubed = mod_mul(z2z2, b.z, P);
let s1 = mod_mul(a.y, z2_cubed, P);
let s2 = mod_mul(b.y, z1_cubed, P);
if u1 == u2 {
if s1 != s2 {
return JacobianPoint::infinity();
}
return jacobian_double(a);
}
let h = u2.sub_mod(u1, P);
let two_h = h.add_mod(h, P);
let i = mod_mul(two_h, two_h, P);
let j = mod_mul(h, i, P);
let s2_minus_s1 = s2.sub_mod(s1, P);
let r = s2_minus_s1.add_mod(s2_minus_s1, P);
let v = mod_mul(u1, i, P);
let r2 = mod_mul(r, r, P);
let two_v = v.add_mod(v, P);
let x3 = r2.sub_mod(j, P).sub_mod(two_v, P);
let v_minus_x3 = v.sub_mod(x3, P);
let r_v_minus_x3 = mod_mul(r, v_minus_x3, P);
let two_s1 = s1.add_mod(s1, P);
let two_s1_j = mod_mul(two_s1, j, P);
let y3 = r_v_minus_x3.sub_mod(two_s1_j, P);
let z1_plus_z2 = a.z.add_mod(b.z, P);
let z1_plus_z2_sq = mod_mul(z1_plus_z2, z1_plus_z2, P);
let z_sum = z1_plus_z2_sq.sub_mod(z1z1, P).sub_mod(z2z2, P);
let z3 = mod_mul(z_sum, h, P);
JacobianPoint {
x: x3,
y: y3,
z: z3,
infinity: false,
}
}
fn jacobian_add_mixed(a: JacobianPoint, b: AffinePoint) -> JacobianPoint {
if a.infinity {
return JacobianPoint::from_affine(b);
}
if b.infinity {
return a;
}
let z1z1 = mod_mul(a.z, a.z, P);
let u2 = mod_mul(b.x, z1z1, P);
let z1_cubed = mod_mul(z1z1, a.z, P);
let s2 = mod_mul(b.y, z1_cubed, P);
if a.x == u2 {
if a.y != s2 {
return JacobianPoint::infinity();
}
return jacobian_double(a);
}
let h = u2.sub_mod(a.x, P);
let hh = mod_mul(h, h, P);
let i = hh.add_mod(hh, P).add_mod(hh, P).add_mod(hh, P);
let j = mod_mul(h, i, P);
let s2_minus_y1 = s2.sub_mod(a.y, P);
let r = s2_minus_y1.add_mod(s2_minus_y1, P);
let v = mod_mul(a.x, i, P);
let r2 = mod_mul(r, r, P);
let two_v = v.add_mod(v, P);
let x3 = r2.sub_mod(j, P).sub_mod(two_v, P);
let v_minus_x3 = v.sub_mod(x3, P);
let r_v_minus_x3 = mod_mul(r, v_minus_x3, P);
let two_y1 = a.y.add_mod(a.y, P);
let two_y1_j = mod_mul(two_y1, j, P);
let y3 = r_v_minus_x3.sub_mod(two_y1_j, P);
let z1_plus_h = a.z.add_mod(h, P);
let z1_plus_h_sq = mod_mul(z1_plus_h, z1_plus_h, P);
let z3 = z1_plus_h_sq.sub_mod(z1z1, P).sub_mod(hh, P);
JacobianPoint {
x: x3,
y: y3,
z: z3,
infinity: false,
}
}
fn mod_mul(a: U256, b: U256, modulus: U256) -> U256 {
let product = mul_wide(a, b);
if modulus == P {
mod_512_by_p256(product)
} else {
mod_512_by_256(product, modulus)
}
}
fn mod_inv(a: U256, modulus: U256) -> Result<U256> {
if a.is_zero() {
return Err(Error::CryptoFailure(
"p256 modular inverse of zero is undefined",
));
}
let mut u = a.modulo_small(modulus);
let mut v = modulus;
let mut x1 = ONE;
let mut x2 = ZERO;
while u != ONE && v != ONE {
while u.is_even() {
u.shr1_assign();
mod_inv_halve_assign(&mut x1, modulus);
}
while v.is_even() {
v.shr1_assign();
mod_inv_halve_assign(&mut x2, modulus);
}
if u.cmp(&v) != Ordering::Less {
u.sub_assign_raw(v);
mod_sub_assign(&mut x1, x2, modulus);
} else {
v.sub_assign_raw(u);
mod_sub_assign(&mut x2, x1, modulus);
}
}
if u == ONE {
Ok(x1.modulo_small(modulus))
} else if v == ONE {
Ok(x2.modulo_small(modulus))
} else {
Err(Error::CryptoFailure("p256 modular inverse does not exist"))
}
}
fn mod_inv_halve_assign(value: &mut U256, modulus: U256) {
if value.is_even() {
value.shr1_assign();
} else {
let (sum, carry) = value.add_raw(modulus);
*value = sum;
value.shr1_with_high_bit_assign(carry);
}
}
fn mod_sub_assign(value: &mut U256, other: U256, modulus: U256) {
if value.cmp(&other) != Ordering::Less {
value.sub_assign_raw(other);
} else {
let (sum, carry) = value.add_raw(modulus);
let (diff, borrow) = sum.sub_raw(other);
debug_assert_eq!(borrow, carry);
*value = diff;
}
}
fn mul_wide(a: U256, b: U256) -> [u32; 16] {
let mut out = [0_u32; 16];
for (i, &a_limb) in a.0.iter().enumerate() {
let mut carry = 0_u64;
for (j, &b_limb) in b.0.iter().enumerate() {
let idx = i + j;
let cur = u64::from(out[idx]) + u64::from(a_limb) * u64::from(b_limb) + carry;
out[idx] = cur as u32;
carry = cur >> 32;
}
let mut idx = i + 8;
while carry != 0 && idx < out.len() {
let cur = u64::from(out[idx]) + carry;
out[idx] = cur as u32;
carry = cur >> 32;
idx += 1;
}
}
out
}
fn mod_512_by_256(dividend: [u32; 16], divisor: U256) -> U256 {
let mut v = divisor.0;
let mut u = [0_u32; 17];
u[..16].copy_from_slice(÷nd);
let norm_shift = v[7].leading_zeros() as usize;
if norm_shift > 0 {
limbs_shl_bits(&mut v, norm_shift);
limbs_shl_bits(&mut u, norm_shift);
}
for j in (0..=8).rev() {
let num = (u64::from(u[j + 8]) << 32) | u64::from(u[j + 7]);
let den = u64::from(v[7]);
let mut qhat = (num / den).min(u64::from(u32::MAX));
let mut rhat = num - (qhat * den);
loop {
let lhs = qhat * u64::from(v[6]);
if (rhat >> 32) != 0 {
break;
}
let rhs = (rhat << 32) + u64::from(u[j + 6]);
if lhs <= rhs {
break;
}
qhat -= 1;
rhat += den;
}
if qhat != 0 {
let borrow = limb_mul_sub(&mut u, j, qhat as u32, &v);
if borrow {
let carry_out = limb_add_at(&mut u, j, &v);
if carry_out != 0 && (j + 9) <= 16 {
u[j + 9] = u[j + 9].wrapping_add(carry_out);
}
}
}
}
while u[8] != 0 || ge_limbs_prefix(&u, &divisor.0, 8) {
if sub_limbs_borrow_prefix(&mut u, &divisor.0, 8) {
if u[8] != 0 {
u[8] -= 1;
} else {
break;
}
}
}
if norm_shift > 0 {
limbs_shr_bits(&mut u[..8], norm_shift);
}
let mut rem = [0_u32; 8];
rem.copy_from_slice(&u[..8]);
if ge_limbs_prefix(&rem, &divisor.0, 8) {
sub_limbs_prefix(&mut rem, &divisor.0, 8);
}
U256(rem)
}
fn mod_512_by_p256(dividend: [u32; 16]) -> U256 {
let mut acc = [0_i128; 16];
for (idx, &limb) in dividend.iter().enumerate() {
acc[idx] = i128::from(limb);
}
for idx in (8..16).rev() {
let high = acc[idx];
if high == 0 {
continue;
}
acc[idx] = 0;
acc[idx - 8] += high;
acc[idx - 1] += high;
acc[idx - 2] -= high;
acc[idx - 5] -= high;
}
let mut carry = normalize_signed_limbs(&mut acc[..8]);
while carry != 0 {
acc[0] += carry;
acc[7] += carry;
acc[6] -= carry;
acc[3] -= carry;
carry = normalize_signed_limbs(&mut acc[..8]);
}
let mut reduced = [0_u32; 8];
for (dst, &src) in reduced.iter_mut().zip(acc[..8].iter()) {
*dst = src as u32;
}
let mut out = U256(reduced);
while out.cmp(&P) != Ordering::Less {
out = out.sub_raw(P).0;
}
out
}
fn normalize_signed_limbs(limbs: &mut [i128]) -> i128 {
let radix = 1_i128 << 32;
let mut carry = 0_i128;
for limb in limbs {
let value = *limb + carry;
*limb = value.rem_euclid(radix);
carry = value.div_euclid(radix);
}
carry
}
fn limbs_shl_bits(limbs: &mut [u32], k: usize) {
if k == 0 || k >= 32 {
return;
}
let mut carry = 0_u32;
for limb in limbs {
let v = *limb;
*limb = (v << k) | carry;
carry = v >> (32 - k);
}
}
fn limbs_shr_bits(limbs: &mut [u32], k: usize) {
if k == 0 || k >= 32 {
return;
}
let mut carry = 0_u32;
for idx in (0..limbs.len()).rev() {
let v = limbs[idx];
limbs[idx] = (v >> k) | carry;
carry = v << (32 - k);
}
}
fn ge_limbs_prefix(a: &[u32], b: &[u32; 8], len: usize) -> bool {
for idx in (0..len).rev() {
if a[idx] != b[idx] {
return a[idx] > b[idx];
}
}
true
}
fn sub_limbs_prefix(a: &mut [u32], b: &[u32; 8], len: usize) {
let mut borrow = 0_u64;
for idx in 0..len {
let av = u64::from(a[idx]);
let bv = u64::from(b[idx]) + borrow;
if av < bv {
a[idx] = (av + (1_u64 << 32) - bv) as u32;
borrow = 1;
} else {
a[idx] = (av - bv) as u32;
borrow = 0;
}
}
}
fn sub_limbs_borrow_prefix(a: &mut [u32], b: &[u32; 8], len: usize) -> bool {
let mut borrow = 0_u64;
for idx in 0..len {
let av = u64::from(a[idx]);
let bv = u64::from(b[idx]) + borrow;
if av < bv {
a[idx] = (av + (1_u64 << 32) - bv) as u32;
borrow = 1;
} else {
a[idx] = (av - bv) as u32;
borrow = 0;
}
}
borrow != 0
}
fn limb_mul_sub(u: &mut [u32; 17], offset: usize, qhat: u32, v: &[u32; 8]) -> bool {
let mut borrow = 0_u64;
let mut carry = 0_u64;
for idx in 0..8 {
let prod = u64::from(qhat) * u64::from(v[idx]) + carry;
carry = prod >> 32;
let sub = (prod & 0xFFFF_FFFF) + borrow;
let cur = u64::from(u[offset + idx]);
if cur < sub {
u[offset + idx] = (cur + (1_u64 << 32) - sub) as u32;
borrow = 1;
} else {
u[offset + idx] = (cur - sub) as u32;
borrow = 0;
}
}
let sub_hi = carry + borrow;
let cur = u64::from(u[offset + 8]);
if cur < sub_hi {
u[offset + 8] = (cur + (1_u64 << 32) - sub_hi) as u32;
true
} else {
u[offset + 8] = (cur - sub_hi) as u32;
false
}
}
fn limb_add_at(u: &mut [u32; 17], offset: usize, v: &[u32; 8]) -> u32 {
let mut carry = 0_u64;
for idx in 0..8 {
let cur = u64::from(u[offset + idx]) + u64::from(v[idx]) + carry;
u[offset + idx] = cur as u32;
carry = cur >> 32;
}
let cur = u64::from(u[offset + 8]) + carry;
u[offset + 8] = cur as u32;
(cur >> 32) as u32
}
#[cfg(test)]
mod tests {
use super::*;
const TWO: U256 = U256([2, 0, 0, 0, 0, 0, 0, 0]);
const THREE: U256 = U256([3, 0, 0, 0, 0, 0, 0, 0]);
fn basepoint() -> AffinePoint {
AffinePoint {
x: GX,
y: GY,
infinity: false,
}
}
#[test]
fn p256_mod_inv_halve_assign_handles_overflowing_odd_values() {
let original = P.sub_raw(TWO).0;
assert!(!original.is_even());
let mut halved = original;
mod_inv_halve_assign(&mut halved, P);
assert_eq!(halved.add_mod(halved, P), original);
}
#[test]
fn p256_mod_sub_assign_handles_overflowing_value_plus_modulus() {
let original = P.sub_raw(THREE).0;
let other = P.sub_raw(ONE).0;
assert_eq!(original.cmp(&other), Ordering::Less);
let mut reduced = original;
mod_sub_assign(&mut reduced, other, P);
assert_eq!(reduced.add_mod(other, P), original);
}
#[test]
fn p256_jacobian_double_matches_add_for_basepoint() {
let base = JacobianPoint::from_affine(basepoint());
let doubled = jacobian_double(base).to_affine().expect("double");
let added = jacobian_add(base, base).to_affine().expect("add");
assert_eq!(doubled, added);
assert!(is_point_on_curve(doubled));
}
#[test]
fn p256_jacobian_add_mixed_matches_full_add_for_basepoint() {
let base_affine = basepoint();
let base = JacobianPoint::from_affine(base_affine);
let mixed = jacobian_add_mixed(base, base_affine)
.to_affine()
.expect("mixed");
let full = jacobian_add(base, JacobianPoint::from_affine(base_affine))
.to_affine()
.expect("full");
assert_eq!(mixed, full);
assert!(is_point_on_curve(mixed));
}
#[test]
fn p256_specialized_field_reduction_matches_generic_reducer() {
let samples = [
(GX, GY),
(GY, GX),
(P.sub_raw(ONE).0, P.sub_raw(TWO).0),
(
U256([
0x89ab_cdef,
0x0123_4567,
0xfedc_ba98,
0x7654_3210,
1,
2,
3,
4,
]),
GX,
),
];
for (lhs, rhs) in samples {
let product = mul_wide(lhs, rhs);
let specialized = mod_512_by_p256(product);
let generic = mod_512_by_256(product, P);
assert_eq!(specialized, generic);
}
}
#[test]
fn p256_double_scalar_mul_matches_separate_multiplications() {
let public = basepoint();
let u1 = U256([
0x89ab_cdef,
0x0123_4567,
0xfedc_ba98,
0x7654_3210,
1,
2,
3,
4,
])
.modulo_small(N);
let u2 = U256([
0x1020_3040,
0x5060_7080,
0x90ab_cdef,
0x1357_9bdf,
5,
6,
7,
8,
])
.modulo_small(N);
let joint = double_scalar_mul_basepoint(u1, u2, public)
.to_affine()
.expect("joint");
let separate = jacobian_add(scalar_mul_basepoint(u1), scalar_mul(u2, public))
.to_affine()
.expect("separate");
assert_eq!(joint, separate);
assert!(is_point_on_curve(joint));
}
#[test]
fn p256_basepoint_scalar_mul_matches_variable_scalar_mul() {
let scalar = U256([
0x89ab_cdef,
0x0123_4567,
0xfedc_ba98,
0x7654_3210,
1,
2,
3,
4,
])
.modulo_small(N);
let fixed = scalar_mul_basepoint(scalar).to_affine().expect("fixed");
let variable = scalar_mul(scalar, basepoint())
.to_affine()
.expect("variable");
assert_eq!(fixed, variable);
assert!(is_point_on_curve(fixed));
}
}