krypteia-arcana 0.1.0

//! Elliptic curve point operations on short Weierstrass curves y^2 = x^3 + ax + b.
//!
//! Uses Jacobian projective coordinates (X, Y, Z) where the affine point is
//! (X/Z^2, Y/Z^3). The point at infinity is represented by Z = 0.
//!
//! Scalar multiplication uses a Montgomery ladder for constant-time execution.

use super::field::*;

/// A point on a short Weierstrass curve in Jacobian projective
/// coordinates `(X, Y, Z)`.
///
/// The corresponding affine point is `(X / Z^2, Y / Z^3)`. The point
/// at infinity is represented by `Z == 0`. Working in projective
/// coordinates lets the point doubling and addition formulas avoid
/// the field inversion that the affine forms would need on every
/// operation; only [`Self::to_affine`] performs an inversion (once,
/// at the boundary back to the user-visible representation).
#[derive(Clone, Copy, Debug)]
pub struct JacobianPoint<const LIMBS: usize> {
    /// Projective X coordinate.
    pub x: FieldElement<LIMBS>,
    /// Projective Y coordinate.
    pub y: FieldElement<LIMBS>,
    /// Projective Z coordinate. `Z == 0` encodes the point at infinity.
    pub z: FieldElement<LIMBS>,
}

impl<const LIMBS: usize> JacobianPoint<LIMBS> {
    /// The point at infinity (identity element).
    pub fn infinity() -> Self {
        Self {
            x: FieldElement::one(),
            y: FieldElement::one(),
            z: FieldElement::ZERO,
        }
    }

    /// Create a point from affine coordinates (x, y). Z = 1.
    pub fn from_affine(x: FieldElement<LIMBS>, y: FieldElement<LIMBS>) -> Self {
        Self {
            x,
            y,
            z: FieldElement::one(),
        }
    }

    /// Returns `true` if this point is the point at infinity (Z == 0).
    pub fn is_infinity(&self) -> bool {
        self.z.is_zero()
    }

    /// Convert to affine coordinates (x, y). Returns None for point at infinity.
    pub fn to_affine(&self, p: &[u64; LIMBS]) -> Option<(FieldElement<LIMBS>, FieldElement<LIMBS>)> {
        if self.z.is_zero() {
            return None;
        }
        let z_inv = field_inv(&self.z, p);
        let z_inv2 = field_sqr(&z_inv, p);
        let z_inv3 = field_mul(&z_inv2, &z_inv, p);
        let x = field_mul(&self.x, &z_inv2, p);
        let y = field_mul(&self.y, &z_inv3, p);
        Some((x, y))
    }
}

/// Parameters for a short Weierstrass curve.
pub struct CurveParams<const LIMBS: usize> {
    /// Field prime p.
    pub p: [u64; LIMBS],
    /// Coefficient a (for P-256 and P-384, a = -3 mod p).
    pub a: FieldElement<LIMBS>,
    /// Coefficient b.
    pub b: FieldElement<LIMBS>,
    /// X coordinate of the generator point `G`.
    pub gx: FieldElement<LIMBS>,
    /// Y coordinate of the generator point `G`.
    pub gy: FieldElement<LIMBS>,
    /// Order n.
    pub n: [u64; LIMBS],
    /// Bit length of `n` (aka `qlen` in RFC 6979).
    ///
    /// For all curves we currently ship, `qlen_bits` is a multiple of 8
    /// **except for secp521r1** where it is 521 (not a multiple of 8).
    /// RFC 6979 is careful to distinguish `qlen` from `rlen = 8*ceil(qlen/8)`
    /// (aka `rlen_bytes = (qlen_bits + 7) / 8`), and for P-521 those two
    /// values disagree by 7 bits. All RFC 6979 byte-length decisions
    /// (int2octets, bits2octets, the HMAC-T accumulation loop) must use
    /// `rlen_bytes`, *not* `LIMBS * 8`.
    pub qlen_bits: usize,
    /// **SEC1 §2.3.5 field element octet length** = `ceil(log2(p) / 8)`.
    ///
    /// This is the **external** width of a field element for all
    /// serialization purposes (uncompressed / compressed SEC1 public
    /// keys, raw r/s in a signature, ECDH shared-secret output). It is
    /// distinct from the **internal** storage width `LIMBS * 8`, which
    /// is an implementation detail driven by the 64-bit limb alignment.
    ///
    /// | Curve                 | LIMBS*8 (internal) | felem_bytes (external) |
    /// |-----------------------|--------------------|-------------------------|
    /// | P-256                 | 32                 | 32                      |
    /// | P-384                 | 48                 | 48                      |
    /// | secp256k1             | 32                 | 32                      |
    /// | brainpoolP256r1       | 32                 | 32                      |
    /// | brainpoolP384r1       | 48                 | 48                      |
    /// | brainpoolP512r1       | 64                 | 64                      |
    /// | **secp521r1 (P-521)** | **72**             | **66**                  |
    ///
    /// P-521 is the only curve where the two values disagree (576-bit
    /// storage vs 521-bit field → 66 bytes externally). The 6 leading
    /// bytes of `to_bytes_be()` on a P-521 field element are always
    /// zero and must be stripped at the serialization boundary;
    /// conversely, parsers must left-pad a 66-byte external value into
    /// the 72-byte internal buffer before building a `FieldElement<9>`.
    /// The 6 byte-aligned curves treat this as a no-op because
    /// `felem_bytes == LIMBS * 8`.
    pub felem_bytes: usize,
}

// ============================================================================
// Curve parameter helpers
// ============================================================================

/// Decode a hex string into a `FieldElement<LIMBS>`. The hex is interpreted as
/// big-endian and must contain an even number of hex digits.
pub fn hex_to_fe<const LIMBS: usize>(hex: &str) -> FieldElement<LIMBS> {
    let bytes: Vec<u8> = (0..hex.len())
        .step_by(2)
        .map(|i| u8::from_str_radix(&hex[i..i + 2], 16).unwrap())
        .collect();
    FieldElement::from_bytes_be(&bytes)
}

/// Decode a hex string into a `[u64; LIMBS]` (used for `p` and `n`).
/// Big-endian hex; pads with leading zeros if shorter than LIMBS*8 bytes.
pub fn hex_to_limbs<const LIMBS: usize>(hex: &str) -> [u64; LIMBS] {
    hex_to_fe::<LIMBS>(hex).limbs
}

// Backwards-compatible aliases used by P-256/P-384 below.
fn hex_to_fe4(hex: &str) -> FieldElement<4> {
    hex_to_fe::<4>(hex)
}
fn hex_to_fe6(hex: &str) -> FieldElement<6> {
    hex_to_fe::<6>(hex)
}

// ============================================================================
// P-256 curve parameters
// ============================================================================

/// NIST P-256 / secp256r1 curve parameters (FIPS 186-4 §D.1.2.3).
pub fn p256_params() -> CurveParams<4> {
    CurveParams {
        p: P256_P,
        a: hex_to_fe4("FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC"),
        b: hex_to_fe4("5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B"),
        gx: hex_to_fe4("6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296"),
        gy: hex_to_fe4("4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5"),
        n: P256_N,
        qlen_bits: 256,
        felem_bytes: 32,
    }
}

/// NIST P-384 / secp384r1 curve parameters (FIPS 186-4 §D.1.2.4).
pub fn p384_params() -> CurveParams<6> {
    CurveParams {
        p: P384_P,
        a: hex_to_fe6(
            "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFC",
        ),
        b: hex_to_fe6(
            "B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF",
        ),
        gx: hex_to_fe6(
            "AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7",
        ),
        gy: hex_to_fe6(
            "3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F",
        ),
        n: P384_N,
        qlen_bits: 384,
        felem_bytes: 48,
    }
}

// ============================================================================
// secp256k1 (SEC 2 v2.0 §2.4.1)  --  Bitcoin / Ethereum curve
// ============================================================================
//
// y^2 = x^3 + 7  (i.e. a = 0, b = 7)
// p = 2^256 - 2^32 - 977
//   = FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F

/// secp256k1 curve parameters (SEC 2 v2.0 §2.4.1).
///
/// `y^2 = x^3 + 7` over `p = 2^256 - 2^32 - 977`. The Bitcoin /
/// Ethereum signing curve, also widely used in Lightning, Nostr,
/// and most blockchain ecosystems.
pub fn secp256k1_params() -> CurveParams<4> {
    CurveParams {
        p: hex_to_limbs::<4>("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F"),
        a: hex_to_fe::<4>("0000000000000000000000000000000000000000000000000000000000000000"),
        b: hex_to_fe::<4>("0000000000000000000000000000000000000000000000000000000000000007"),
        gx: hex_to_fe::<4>("79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798"),
        gy: hex_to_fe::<4>("483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8"),
        n: hex_to_limbs::<4>("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141"),
        qlen_bits: 256,
        felem_bytes: 32,
    }
}

// ============================================================================
// brainpoolP256r1 (RFC 5639 §3.4)
// ============================================================================

/// brainpoolP256r1 curve parameters (BSI / RFC 5639 §3.4).
pub fn brainpoolp256r1_params() -> CurveParams<4> {
    CurveParams {
        p: hex_to_limbs::<4>("A9FB57DBA1EEA9BC3E660A909D838D726E3BF623D52620282013481D1F6E5377"),
        a: hex_to_fe::<4>("7D5A0975FC2C3057EEF67530417AFFE7FB8055C126DC5C6CE94A4B44F330B5D9"),
        b: hex_to_fe::<4>("26DC5C6CE94A4B44F330B5D9BBD77CBF958416295CF7E1CE6BCCDC18FF8C07B6"),
        gx: hex_to_fe::<4>("8BD2AEB9CB7E57CB2C4B482FFC81B7AFB9DE27E1E3BD23C23A4453BD9ACE3262"),
        gy: hex_to_fe::<4>("547EF835C3DAC4FD97F8461A14611DC9C27745132DED8E545C1D54C72F046997"),
        n: hex_to_limbs::<4>("A9FB57DBA1EEA9BC3E660A909D838D718C397AA3B561A6F7901E0E82974856A7"),
        qlen_bits: 256,
        felem_bytes: 32,
    }
}

// ============================================================================
// brainpoolP384r1 (RFC 5639 §3.6)
// ============================================================================

/// brainpoolP384r1 curve parameters (BSI / RFC 5639 §3.6).
pub fn brainpoolp384r1_params() -> CurveParams<6> {
    CurveParams {
        p: hex_to_limbs::<6>(
            "8CB91E82A3386D280F5D6F7E50E641DF152F7109ED5456B412B1DA197FB71123ACD3A729901D1A71874700133107EC53",
        ),
        a: hex_to_fe::<6>(
            "7BC382C63D8C150C3C72080ACE05AFA0C2BEA28E4FB22787139165EFBA91F90F8AA5814A503AD4EB04A8C7DD22CE2826",
        ),
        b: hex_to_fe::<6>(
            "04A8C7DD22CE28268B39B55416F0447C2FB77DE107DCD2A62E880EA53EEB62D57CB4390295DBC9943AB78696FA504C11",
        ),
        gx: hex_to_fe::<6>(
            "1D1C64F068CF45FFA2A63A81B7C13F6B8847A3E77EF14FE3DB7FCAFE0CBD10E8E826E03436D646AAEF87B2E247D4AF1E",
        ),
        gy: hex_to_fe::<6>(
            "8ABE1D7520F9C2A45CB1EB8E95CFD55262B70B29FEEC5864E19C054FF99129280E4646217791811142820341263C5315",
        ),
        n: hex_to_limbs::<6>(
            "8CB91E82A3386D280F5D6F7E50E641DF152F7109ED5456B31F166E6CAC0425A7CF3AB6AF6B7FC3103B883202E9046565",
        ),
        qlen_bits: 384,
        felem_bytes: 48,
    }
}

// ============================================================================
// brainpoolP512r1 (RFC 5639 §3.7)
// ============================================================================

/// brainpoolP512r1 curve parameters (BSI / RFC 5639 §3.7).
pub fn brainpoolp512r1_params() -> CurveParams<8> {
    CurveParams {
        p: hex_to_limbs::<8>(
            "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA703308717D4D9B009BC66842AECDA12AE6A380E62881FF2F2D82C68528AA6056583A48F3",
        ),
        a: hex_to_fe::<8>(
            "7830A3318B603B89E2327145AC234CC594CBDD8D3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CA",
        ),
        b: hex_to_fe::<8>(
            "3DF91610A83441CAEA9863BC2DED5D5AA8253AA10A2EF1C98B9AC8B57F1117A72BF2C7B9E7C1AC4D77FC94CADC083E67984050B75EBAE5DD2809BD638016F723",
        ),
        gx: hex_to_fe::<8>(
            "81AEE4BDD82ED9645A21322E9C4C6A9385ED9F70B5D916C1B43B62EEF4D0098EFF3B1F78E2D0D48D50D1687B93B97D5F7C6D5047406A5E688B352209BCB9F822",
        ),
        gy: hex_to_fe::<8>(
            "7DDE385D566332ECC0EABFA9CF7822FDF209F70024A57B1AA000C55B881F8111B2DCDE494A5F485E5BCA4BD88A2763AED1CA2B2FA8F0540678CD1E0F3AD80892",
        ),
        n: hex_to_limbs::<8>(
            "AADD9DB8DBE9C48B3FD4E6AE33C9FC07CB308DB3B3C9D20ED6639CCA70330870553E5C414CA92619418661197FAC10471DB1D381085DDADDB58796829CA90069",
        ),
        qlen_bits: 512,
        felem_bytes: 64,
    }
}

// ============================================================================
// secp521r1 / NIST P-521  (FIPS 186-4 §D.1.2.5, SEC 2 §2.9.1)
// ============================================================================
//
// y^2 = x^3 - 3*x + b
// p = 2^521 - 1   (a Mersenne prime, so p ≡ 3 (mod 4))
// qlen = 521 bits (NOT a multiple of 8 -- the only curve we ship with
// this property; see CurveParams::qlen_bits for the RFC 6979 implications)
//
// The field element width LIMBS = 9 uses 576 bits of storage, of which
// the top 55 bits are always zero for values in [0, p). Likewise scalars
// mod n have 55 zero leading bits in the 9-limb representation.

/// secp521r1 / NIST P-521 curve parameters (FIPS 186-4 §D.1.2.5).
///
/// `y^2 = x^3 - 3*x + b` over the Mersenne prime `p = 2^521 - 1`.
/// The only curve we ship with `qlen` not a multiple of 8 -- see
/// [`CurveParams::qlen_bits`] and [`CurveParams::felem_bytes`] for
/// the implications on RFC 6979 byte lengths and SEC1 octet
/// encoding respectively.
pub fn secp521r1_params() -> CurveParams<9> {
    // All values verified against NIST SP 800-186 / FIPS 186-5 Appendix C.2.3
    // and cross-checked via Python (on-curve + n*G == infinity).
    CurveParams {
        p: hex_to_limbs::<9>(
            "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
        ),
        a: hex_to_fe::<9>(
            "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC",
        ),
        b: hex_to_fe::<9>(
            "0051953EB9618E1C9A1F929A21A0B68540EEA2DA725B99B315F3B8B489918EF109E156193951EC7E937B1652C0BD3BB1BF073573DF883D2C34F1EF451FD46B503F00",
        ),
        gx: hex_to_fe::<9>(
            "00C6858E06B70404E9CD9E3ECB662395B4429C648139053FB521F828AF606B4D3DBAA14B5E77EFE75928FE1DC127A2FFA8DE3348B3C1856A429BF97E7E31C2E5BD66",
        ),
        gy: hex_to_fe::<9>(
            "011839296A789A3BC0045C8A5FB42C7D1BD998F54449579B446817AFBD17273E662C97EE72995EF42640C550B9013FAD0761353C7086A272C24088BE94769FD16650",
        ),
        n: hex_to_limbs::<9>(
            "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5C9B8899C47AEBB6FB71E91386409",
        ),
        qlen_bits: 521,
        // P-521 is the only curve where felem_bytes (66) < LIMBS*8 (72).
        // External serialization must strip the 6 leading zero bytes on
        // output and the parser must left-pad them back in on input.
        felem_bytes: 66,
    }
}

// ============================================================================
// Point operations
// ============================================================================

/// Check whether the affine point `(x, y)` lies on the short Weierstrass curve
/// `y^2 = x^3 + a*x + b` defined by `params`.
///
/// **Critical for ECDH**: any externally-supplied public key must be validated
/// with this function before being multiplied by a secret scalar. Otherwise an
/// "invalid curve attack" can recover bits of the secret key by feeding crafted
/// off-curve points whose order in the broken group is small.
///
/// Returns `true` iff the affine equation holds modulo `p`.
pub fn is_on_curve<const LIMBS: usize>(
    x: &FieldElement<LIMBS>,
    y: &FieldElement<LIMBS>,
    params: &CurveParams<LIMBS>,
) -> bool {
    let p = &params.p;
    let y2 = field_sqr(y, p);
    let x2 = field_sqr(x, p);
    let x3 = field_mul(&x2, x, p);
    let ax = field_mul(&params.a, x, p);
    let rhs = field_add(&field_add(&x3, &ax, p), &params.b, p);
    y2 == rhs
}

/// Point doubling in Jacobian coordinates for **any** short Weierstrass curve
/// y^2 = x^3 + a*x + b. Uses the generic "dbl-2007-bl" formula by Bernstein-Lange
/// (cost: 2M + 8S + 1*a-mul + 10add). Works for arbitrary `a`, including a=0
/// (secp256k1) and the random `a` of Brainpool curves.
///
/// Reference: <https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl>
///
/// **Constant-time**: no branches on point values. When the input is the
/// point at infinity (`Z1 == 0`), the formulas naturally produce `Z3 == 0`
/// as well (since `Z3 = (Y1+Z1)^2 - YY - ZZ = Y1^2 - Y1^2 - 0 = 0`), so no
/// explicit special-case branch is needed. The output `(X3, Y3, 0)` is a
/// non-canonical but valid Jacobian encoding of the point at infinity.
pub fn point_double<const LIMBS: usize>(
    pt: &JacobianPoint<LIMBS>,
    params: &CurveParams<LIMBS>,
) -> JacobianPoint<LIMBS> {
    let p = &params.p;

    // XX   = X1^2
    // YY   = Y1^2
    // YYYY = YY^2
    // ZZ   = Z1^2
    let xx = field_sqr(&pt.x, p);
    let yy = field_sqr(&pt.y, p);
    let yyyy = field_sqr(&yy, p);
    let zz = field_sqr(&pt.z, p);

    // S = 2*((X1+YY)^2 - XX - YYYY)
    let x_plus_yy = field_add(&pt.x, &yy, p);
    let x_plus_yy_sq = field_sqr(&x_plus_yy, p);
    let s_inner = field_sub(&field_sub(&x_plus_yy_sq, &xx, p), &yyyy, p);
    let s = field_add(&s_inner, &s_inner, p);

    // M = 3*XX + a*ZZ^2
    let two_xx = field_add(&xx, &xx, p);
    let three_xx = field_add(&two_xx, &xx, p);
    let zz_sq = field_sqr(&zz, p);
    let a_zz_sq = field_mul(&params.a, &zz_sq, p);
    let m = field_add(&three_xx, &a_zz_sq, p);

    // T = M^2 - 2*S
    let m_sq = field_sqr(&m, p);
    let two_s = field_add(&s, &s, p);
    let t = field_sub(&m_sq, &two_s, p);

    // X3 = T
    let x3 = t;

    // Y3 = M*(S - T) - 8*YYYY
    let s_minus_t = field_sub(&s, &t, p);
    let m_term = field_mul(&m, &s_minus_t, p);
    let two_yyyy = field_add(&yyyy, &yyyy, p);
    let four_yyyy = field_add(&two_yyyy, &two_yyyy, p);
    let eight_yyyy = field_add(&four_yyyy, &four_yyyy, p);
    let y3 = field_sub(&m_term, &eight_yyyy, p);

    // Z3 = (Y1+Z1)^2 - YY - ZZ
    //    = 0 iff Z1 = 0 (input is infinity), yielding infinity output.
    let y_plus_z = field_add(&pt.y, &pt.z, p);
    let y_plus_z_sq = field_sqr(&y_plus_z, p);
    let z3 = field_sub(&field_sub(&y_plus_z_sq, &yy, p), &zz, p);

    JacobianPoint { x: x3, y: y3, z: z3 }
}

/// Jacobian point addition: `P + Q`. Uses the generic "add-2007-bl" formula
/// (no assumption on Z coordinates).
///
/// **Not constant-time**: branches on whether `P == Q` (to delegate to
/// [`point_double`]) and on whether `P == -Q` (to short-circuit to infinity).
/// Intended for operations on **public** points only, such as the Shamir
/// trick in [`double_scalar_mul`] used by ECDSA verify.
///
/// For the CT-critical scalar multiplication path (signing, ECDH), use
/// `point_add_ct` which is uniform over all P, Q but requires the
/// caller to respect the Montgomery ladder invariant `R1 - R0 == P` so
/// that `P == Q` can never occur (the only case the CT variant cannot
/// handle via the generic formulas alone).
///
/// Reference: <https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl>
pub fn point_add<const LIMBS: usize>(
    p_pt: &JacobianPoint<LIMBS>,
    q_pt: &JacobianPoint<LIMBS>,
    params: &CurveParams<LIMBS>,
) -> JacobianPoint<LIMBS> {
    let p = &params.p;

    // Handle identity cases with CT selection at end.
    let p_inf = p_pt.z.is_zero();
    let q_inf = q_pt.z.is_zero();

    // Z1^2, Z1^3
    let z1z1 = field_sqr(&p_pt.z, p);
    let z1z1z1 = field_mul(&z1z1, &p_pt.z, p);

    // Z2^2, Z2^3
    let z2z2 = field_sqr(&q_pt.z, p);
    let z2z2z2 = field_mul(&z2z2, &q_pt.z, p);

    // U1 = X1 * Z2^2, U2 = X2 * Z1^2
    let u1 = field_mul(&p_pt.x, &z2z2, p);
    let u2 = field_mul(&q_pt.x, &z1z1, p);

    // S1 = Y1 * Z2^3, S2 = Y2 * Z1^3
    let s1 = field_mul(&p_pt.y, &z2z2z2, p);
    let s2 = field_mul(&q_pt.y, &z1z1z1, p);

    // H = U2 - U1
    let h = field_sub(&u2, &u1, p);
    // r = S2 - S1
    let r = field_sub(&s2, &s1, p);

    // If H == 0 and r == 0, points are equal -> double.
    let h_zero = h.is_zero();
    let r_zero = r.is_zero();

    if !p_inf && !q_inf && h_zero && r_zero {
        return point_double(p_pt, params);
    }
    // If H == 0 and r != 0, points are inverses -> infinity.
    if !p_inf && !q_inf && h_zero && !r_zero {
        return JacobianPoint::infinity();
    }

    let h2 = field_sqr(&h, p);
    let h3 = field_mul(&h2, &h, p);
    let u1h2 = field_mul(&u1, &h2, p);

    // X3 = r^2 - H^3 - 2*U1*H^2
    let r2 = field_sqr(&r, p);
    let u1h2_2 = field_add(&u1h2, &u1h2, p);
    let x3 = field_sub(&field_sub(&r2, &h3, p), &u1h2_2, p);

    // Y3 = r*(U1*H^2 - X3) - S1*H^3
    let u1h2_minus_x3 = field_sub(&u1h2, &x3, p);
    let r_term = field_mul(&r, &u1h2_minus_x3, p);
    let s1h3 = field_mul(&s1, &h3, p);
    let y3 = field_sub(&r_term, &s1h3, p);

    // Z3 = Z1 * Z2 * H
    let z1z2 = field_mul(&p_pt.z, &q_pt.z, p);
    let z3 = field_mul(&z1z2, &h, p);

    let result = JacobianPoint { x: x3, y: y3, z: z3 };

    // CT select for infinity cases.
    if p_inf {
        *q_pt
    } else if q_inf {
        *p_pt
    } else {
        result
    }
}

/// Constant-time Jacobian point addition for the Montgomery-ladder path.
///
/// Same `add-2007-bl` formula as [`point_add`], but:
/// - No early-return on `P == Q` or `P == -Q`. The ladder invariant
///   `R1 - R0 == P` (with `P != O`) guarantees those inputs never occur
///   with both points finite. When they *do* occur the formulas give
///   `Z3 = 0` (= infinity); for `P == -Q` that is the algebraically
///   correct answer, and for `P == Q` it is wrong (should be `2P`) --
///   but the ladder guarantees this is unreachable.
/// - The `p_inf` / `q_inf` selection at the end uses a branchless
///   [`ct_select_point`] cascade instead of if/else, so the returned
///   control flow is independent of the point values.
///
/// **Callers must ensure `P != Q`** (otherwise a silent wrong result on
/// that single branch). The Montgomery ladder enforces this by
/// construction; do not reuse this function outside that context.
fn point_add_ct<const LIMBS: usize>(
    p_pt: &JacobianPoint<LIMBS>,
    q_pt: &JacobianPoint<LIMBS>,
    params: &CurveParams<LIMBS>,
) -> JacobianPoint<LIMBS> {
    let p = &params.p;

    let p_inf = p_pt.z.is_zero();
    let q_inf = q_pt.z.is_zero();

    let z1z1 = field_sqr(&p_pt.z, p);
    let z1z1z1 = field_mul(&z1z1, &p_pt.z, p);
    let z2z2 = field_sqr(&q_pt.z, p);
    let z2z2z2 = field_mul(&z2z2, &q_pt.z, p);

    let u1 = field_mul(&p_pt.x, &z2z2, p);
    let u2 = field_mul(&q_pt.x, &z1z1, p);
    let s1 = field_mul(&p_pt.y, &z2z2z2, p);
    let s2 = field_mul(&q_pt.y, &z1z1z1, p);

    let h = field_sub(&u2, &u1, p);
    let r = field_sub(&s2, &s1, p);

    let h2 = field_sqr(&h, p);
    let h3 = field_mul(&h2, &h, p);
    let u1h2 = field_mul(&u1, &h2, p);

    let r2 = field_sqr(&r, p);
    let u1h2_2 = field_add(&u1h2, &u1h2, p);
    let x3 = field_sub(&field_sub(&r2, &h3, p), &u1h2_2, p);

    let u1h2_minus_x3 = field_sub(&u1h2, &x3, p);
    let r_term = field_mul(&r, &u1h2_minus_x3, p);
    let s1h3 = field_mul(&s1, &h3, p);
    let y3 = field_sub(&r_term, &s1h3, p);

    let z1z2 = field_mul(&p_pt.z, &q_pt.z, p);
    let z3 = field_mul(&z1z2, &h, p);

    let general = JacobianPoint { x: x3, y: y3, z: z3 };

    // Branchless select cascade:
    //   q_inf => p_pt, else p_inf => q_pt, else general.
    // When both are infinity the p_inf override wins, producing q_pt, which
    // is itself infinity -- correct.
    let mut out = general;
    out = ct_select_point(p_pt, &out, q_inf as u64);
    out = ct_select_point(q_pt, &out, p_inf as u64);
    out
}

/// Constant-time conditional select of a Jacobian point.
///
/// Returns `a` if `condition != 0`, else `b`. No branch, no data-dependent
/// memory access. `condition` is expected to be 0 or 1 (the caller enforces
/// this by deriving it from `bool as u64` or a masked scalar bit).
fn ct_select_point<const LIMBS: usize>(
    a: &JacobianPoint<LIMBS>,
    b: &JacobianPoint<LIMBS>,
    condition: u64,
) -> JacobianPoint<LIMBS> {
    let mask = 0u64.wrapping_sub(condition);
    let inv = !mask;
    let mut out = JacobianPoint {
        x: FieldElement { limbs: [0u64; LIMBS] },
        y: FieldElement { limbs: [0u64; LIMBS] },
        z: FieldElement { limbs: [0u64; LIMBS] },
    };
    for i in 0..LIMBS {
        out.x.limbs[i] = (a.x.limbs[i] & mask) | (b.x.limbs[i] & inv);
        out.y.limbs[i] = (a.y.limbs[i] & mask) | (b.y.limbs[i] & inv);
        out.z.limbs[i] = (a.z.limbs[i] & mask) | (b.z.limbs[i] & inv);
    }
    out
}

/// Scalar multiplication using the **constant-time Montgomery ladder**.
/// Computes `k * P` for a scalar `k` and a non-infinity point `P`.
///
/// # Constant-time properties
///
/// - Fixed iteration count `LIMBS * 64` -- independent of `k`.
/// - Each iteration performs exactly one `ct_swap`, one `point_add_ct`
///   and one [`point_double`], in that order. No branch depends on any
///   scalar bit beyond the `ct_swap` mask.
/// - [`point_double`] and `point_add_ct` themselves are uniform: they
///   always compute the generic formulas and then apply branchless
///   selects for the `Z == 0` (infinity) edge cases that occur during
///   the leading-zero bits of `k`.
///
/// # Ladder invariant
///
/// At every step of the scan, `R1 - R0 == P`. This guarantees that
/// `point_add_ct` is never called with `R0 == R1` (which would require
/// `P == O`; `P` is assumed non-infinity). The `R0 == -R1` case is
/// algebraically valid (the formulas give `Z3 = 0` = infinity) and
/// therefore handled with no special-casing.
pub fn scalar_mul_point<const LIMBS: usize>(
    k: &FieldElement<LIMBS>,
    point: &JacobianPoint<LIMBS>,
    params: &CurveParams<LIMBS>,
) -> JacobianPoint<LIMBS> {
    let mut r0 = JacobianPoint::infinity();
    let mut r1 = *point;

    let total_bits = LIMBS * 64;
    for i in (0..total_bits).rev() {
        let limb_idx = i / 64;
        let bit_idx = i % 64;
        let bit = (k.limbs[limb_idx] >> bit_idx) & 1;

        ct_swap(&mut r0, &mut r1, bit);
        r1 = point_add_ct(&r0, &r1, params);
        r0 = point_double(&r0, params);
        ct_swap(&mut r0, &mut r1, bit);
    }

    r0
}

/// Constant-time conditional swap of two points.
fn ct_swap<const LIMBS: usize>(a: &mut JacobianPoint<LIMBS>, b: &mut JacobianPoint<LIMBS>, condition: u64) {
    let mask = 0u64.wrapping_sub(condition);
    for i in 0..LIMBS {
        let t = mask & (a.x.limbs[i] ^ b.x.limbs[i]);
        a.x.limbs[i] ^= t;
        b.x.limbs[i] ^= t;

        let t = mask & (a.y.limbs[i] ^ b.y.limbs[i]);
        a.y.limbs[i] ^= t;
        b.y.limbs[i] ^= t;

        let t = mask & (a.z.limbs[i] ^ b.z.limbs[i]);
        a.z.limbs[i] ^= t;
        b.z.limbs[i] ^= t;
    }
}

/// Compute k1*G + k2*Q (used in ECDSA verify).
/// Not constant-time in the public values (signature verification is public).
pub fn double_scalar_mul<const LIMBS: usize>(
    k1: &FieldElement<LIMBS>,
    g: &JacobianPoint<LIMBS>,
    k2: &FieldElement<LIMBS>,
    q: &JacobianPoint<LIMBS>,
    params: &CurveParams<LIMBS>,
) -> JacobianPoint<LIMBS> {
    // Shamir's trick: scan bits of k1 and k2 simultaneously.
    let total_bits = LIMBS * 64;
    let mut result = JacobianPoint::infinity();

    // Precompute: G, Q, G+Q
    let g_plus_q = point_add(g, q, params);

    for i in (0..total_bits).rev() {
        result = point_double(&result, params);

        let limb_idx = i / 64;
        let bit_idx = i % 64;
        let b1 = (k1.limbs[limb_idx] >> bit_idx) & 1;
        let b2 = (k2.limbs[limb_idx] >> bit_idx) & 1;

        let to_add = match (b1, b2) {
            (1, 1) => Some(&g_plus_q),
            (1, 0) => Some(g),
            (0, 1) => Some(q),
            _ => None,
        };

        if let Some(pt) = to_add {
            result = point_add(&result, pt, params);
        }
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;

    fn hex_to_bytes(hex: &str) -> Vec<u8> {
        (0..hex.len())
            .step_by(2)
            .map(|i| u8::from_str_radix(&hex[i..i + 2], 16).unwrap())
            .collect()
    }

    #[test]
    fn test_p256_generator_on_curve() {
        let params = p256_params();
        let p = &params.p;

        // Verify y^2 = x^3 + ax + b for the generator.
        let x = &params.gx;
        let y = &params.gy;
        let y2 = field_sqr(y, p);

        let x2 = field_sqr(x, p);
        let x3 = field_mul(&x2, x, p);
        let ax = field_mul(&params.a, x, p);
        let rhs = field_add(&field_add(&x3, &ax, p), &params.b, p);

        assert_eq!(y2, rhs, "Generator point not on curve");
    }

    #[test]
    fn test_p256_scalar_mul_generator() {
        let params = p256_params();
        let g = JacobianPoint::from_affine(params.gx, params.gy);

        // 1*G should equal G.
        let one = FieldElement::<4>::one();
        let result = scalar_mul_point(&one, &g, &params);
        let (rx, ry) = result.to_affine(&params.p).unwrap();
        assert_eq!(rx, params.gx);
        assert_eq!(ry, params.gy);
    }

    #[test]
    fn test_p256_double_generator() {
        let params = p256_params();
        let g = JacobianPoint::from_affine(params.gx, params.gy);

        // 2*G via scalar_mul.
        let two = FieldElement::<4>::from_bytes_be(&[2]);
        let result = scalar_mul_point(&two, &g, &params);
        let (rx, _ry) = result.to_affine(&params.p).unwrap();

        // 2*G via point_double.
        let doubled = point_double(&g, &params);
        let (dx, _dy) = doubled.to_affine(&params.p).unwrap();

        assert_eq!(rx, dx);
    }

    #[test]
    fn test_p256_n_times_g_is_infinity() {
        let params = p256_params();
        let g = JacobianPoint::from_affine(params.gx, params.gy);

        // n*G should be the point at infinity.
        let n = FieldElement::<4> { limbs: params.n };
        let result = scalar_mul_point(&n, &g, &params);
        assert!(result.is_infinity(), "n*G should be infinity");
    }

    /// Generic check: verify y^2 == x^3 + a*x + b for the generator.
    fn check_generator_on_curve<const LIMBS: usize>(params: &CurveParams<LIMBS>) {
        assert!(is_on_curve(&params.gx, &params.gy, params), "Generator not on curve");
    }

    /// Generic check: n*G must be the point at infinity. This is the strongest
    /// quick sanity check on the (p, a, b, G, n) tuple together.
    fn check_n_times_g_is_infinity<const LIMBS: usize>(params: &CurveParams<LIMBS>) {
        let g = JacobianPoint::from_affine(params.gx, params.gy);
        let n = FieldElement::<LIMBS> { limbs: params.n };
        let result = scalar_mul_point(&n, &g, params);
        assert!(result.is_infinity(), "n*G is not infinity");
    }

    #[test]
    fn test_p384_generator_on_curve() {
        check_generator_on_curve(&p384_params());
    }

    #[test]
    fn test_p384_n_times_g_is_infinity() {
        check_n_times_g_is_infinity(&p384_params());
    }

    #[test]
    fn test_secp256k1_generator_on_curve() {
        check_generator_on_curve(&secp256k1_params());
    }

    #[test]
    fn test_secp256k1_n_times_g_is_infinity() {
        check_n_times_g_is_infinity(&secp256k1_params());
    }

    #[test]
    fn test_brainpoolp256r1_generator_on_curve() {
        check_generator_on_curve(&brainpoolp256r1_params());
    }

    #[test]
    fn test_brainpoolp256r1_n_times_g_is_infinity() {
        check_n_times_g_is_infinity(&brainpoolp256r1_params());
    }

    #[test]
    fn test_brainpoolp384r1_generator_on_curve() {
        check_generator_on_curve(&brainpoolp384r1_params());
    }

    #[test]
    fn test_brainpoolp384r1_n_times_g_is_infinity() {
        check_n_times_g_is_infinity(&brainpoolp384r1_params());
    }

    #[test]
    fn test_brainpoolp512r1_generator_on_curve() {
        check_generator_on_curve(&brainpoolp512r1_params());
    }

    #[test]
    fn test_brainpoolp512r1_n_times_g_is_infinity() {
        check_n_times_g_is_infinity(&brainpoolp512r1_params());
    }

    #[test]
    fn test_secp521r1_generator_on_curve() {
        check_generator_on_curve(&secp521r1_params());
    }

    #[test]
    fn test_secp521r1_n_times_g_is_infinity() {
        check_n_times_g_is_infinity(&secp521r1_params());
    }

    #[test]
    fn test_p256_known_2g() {
        // Known value of 2*G for P-256.
        let params = p256_params();
        let g = JacobianPoint::from_affine(params.gx, params.gy);

        let two = FieldElement::<4>::from_bytes_be(&[2]);
        let result = scalar_mul_point(&two, &g, &params);
        let (rx, ry) = result.to_affine(&params.p).unwrap();

        let expected_x = hex_to_bytes("7CF27B188D034F7E8A52380304B51AC3C08969E277F21B35A60B48FC47669978");
        let expected_y = hex_to_bytes("07775510DB8ED040293D9AC69F7430DBBA7DADE63CE982299E04B79D227873D1");

        assert_eq!(rx.to_bytes_be(), expected_x);
        assert_eq!(ry.to_bytes_be(), expected_y);
    }
}