dcrypt_algorithms/ec/p256/
field.rs

1//! P-256 field arithmetic implementation
2
3use crate::ec::p256::constants::P256_FIELD_ELEMENT_SIZE;
4use crate::error::{Error, Result};
5use subtle::{Choice, ConditionallySelectable};
6
7/// P-256 field element representing values in F_p
8///
9/// Internally stored as 8 little-endian 32-bit limbs for efficient arithmetic.
10/// All operations maintain the invariant that values are reduced modulo p.
11#[derive(Clone, Debug, PartialEq, Eq)]
12pub struct FieldElement(pub(crate) [u32; 8]);
13
14impl FieldElement {
15    /* -------------------------------------------------------------------- */
16    /*  NIST P-256 Field Constants (stored as little-endian 32-bit limbs)  */
17    /* -------------------------------------------------------------------- */
18
19    /// The NIST P-256 prime modulus: p = 2^256 - 2^224 + 2^192 + 2^96 - 1
20    /// Stored as 8 little-endian 32-bit limbs where limbs[0] is least significant
21    pub(crate) const MOD_LIMBS: [u32; 8] = [
22        0xFFFF_FFFF, // 2⁰ … 2³¹
23        0xFFFF_FFFF, // 2³² … 2⁶³
24        0xFFFF_FFFF, // 2⁶⁴ … 2⁹⁵
25        0x0000_0000, // 2⁹⁶ … 2¹²⁷
26        0x0000_0000, // 2¹²⁸ … 2¹⁵⁹
27        0x0000_0000, // 2¹⁶⁰ … 2¹⁹¹
28        0x0000_0001, // 2¹⁹² … 2²²³
29        0xFFFF_FFFF, // 2²²⁴ … 2²⁵⁵
30    ];
31
32    /// The curve parameter a = -3 mod p, used in the curve equation y² = x³ + ax + b
33    /// For P-256: a = p - 3
34    pub(crate) const A_M3: [u32; 8] = [
35        0xFFFF_FFFC, // (2³² - 1) - 3 = 2³² - 4
36        0xFFFF_FFFF,
37        0xFFFF_FFFF,
38        0x0000_0000,
39        0x0000_0000,
40        0x0000_0000,
41        0x0000_0001,
42        0xFFFF_FFFF, // Most significant limb
43    ];
44
45    /// The additive identity element: 0
46    pub fn zero() -> Self {
47        FieldElement([0, 0, 0, 0, 0, 0, 0, 0])
48    }
49
50    /// The multiplicative identity element: 1
51    pub fn one() -> Self {
52        FieldElement([1, 0, 0, 0, 0, 0, 0, 0])
53    }
54
55    /// Create a field element from big-endian byte representation
56    ///
57    /// Validates that the input represents a value less than the field modulus p.
58    /// Returns an error if the value is >= p.
59    pub fn from_bytes(bytes: &[u8; P256_FIELD_ELEMENT_SIZE]) -> Result<Self> {
60        let mut limbs = [0u32; 8];
61
62        // Convert from big-endian bytes to little-endian limbs
63        // limbs[0] = least-significant 4 bytes (bytes[28..32])
64        // limbs[7] = most-significant 4 bytes (bytes[0..4])
65        #[allow(clippy::needless_range_loop)] // Index used for offset calculation
66        for i in 0..8 {
67            let offset = (7 - i) * 4; // Byte offset: 28, 24, 20, ..., 0
68            limbs[i] = u32::from_be_bytes([
69                bytes[offset],
70                bytes[offset + 1],
71                bytes[offset + 2],
72                bytes[offset + 3],
73            ]);
74        }
75
76        // Validate that the value is in the field (< p)
77        let fe = FieldElement(limbs);
78        if !fe.is_valid() {
79            return Err(Error::param(
80                "FieldElement",
81                "Value must be less than the field modulus",
82            ));
83        }
84
85        Ok(fe)
86    }
87
88    /// Convert field element to big-endian byte representation
89    pub fn to_bytes(&self) -> [u8; P256_FIELD_ELEMENT_SIZE] {
90        let mut bytes = [0u8; P256_FIELD_ELEMENT_SIZE];
91
92        // Convert from little-endian limbs to big-endian bytes
93        for i in 0..8 {
94            let limb_bytes = self.0[i].to_be_bytes();
95            let offset = (7 - i) * 4; // Byte offset: 28, 24, 20, ..., 0
96            bytes[offset..offset + 4].copy_from_slice(&limb_bytes);
97        }
98        bytes
99    }
100
101    /// Constant-time validation that the field element is in canonical form (< p)
102    ///
103    /// Uses constant-time subtraction to check if self < p without branching.
104    /// Returns true if the element is valid (< p), false otherwise.
105    #[inline(always)]
106    pub fn is_valid(&self) -> bool {
107        // Attempt to subtract p from self
108        // If subtraction requires a borrow, then self < p (valid)
109        let (_, borrow) = Self::sbb8(self.0, Self::MOD_LIMBS);
110        borrow == 1
111    }
112
113    /// Constant-time field addition: (self + other) mod p
114    ///
115    /// Algorithm:
116    /// 1. Perform full 256-bit addition with carry detection
117    /// 2. Conditionally subtract p if result >= p
118    /// 3. Ensure result is in canonical form
119    #[inline(always)]
120    pub fn add(&self, other: &Self) -> Self {
121        // Step 1: Full 256-bit addition
122        let (sum, carry) = Self::adc8(self.0, other.0);
123
124        // Step 2: Attempt conditional reduction by subtracting p
125        let (sum_minus_p, borrow) = Self::sbb8(sum, Self::MOD_LIMBS);
126
127        // Step 3: Choose reduced value if:
128        //   - Addition overflowed (carry == 1), OR
129        //   - Subtraction didn't borrow (borrow == 0), meaning sum >= p
130        let need_reduce = (carry | (borrow ^ 1)) & 1;
131        let reduced = Self::conditional_select(&sum, &sum_minus_p, Choice::from(need_reduce as u8));
132
133        // Step 4: Final canonical reduction
134        reduced.conditional_sub_p()
135    }
136
137    /// Constant-time field subtraction: (self - other) mod p
138    ///
139    /// Algorithm:
140    /// 1. Perform limb-wise subtraction
141    /// 2. If subtraction borrows, add p to get the correct positive result
142    pub fn sub(&self, other: &Self) -> Self {
143        // Step 1: Raw subtraction
144        let (diff, borrow) = Self::sbb8(self.0, other.0);
145
146        // Step 2: If we borrowed, add p to get the correct positive result
147        let (candidate, _) = Self::adc8(diff, Self::MOD_LIMBS);
148
149        // Step 3: Constant-time select based on borrow flag
150        Self::conditional_select(&diff, &candidate, Choice::from(borrow as u8))
151    }
152
153    /// Field multiplication: (self * other) mod p
154    ///
155    /// Algorithm:
156    /// 1. Compute the full 512-bit product using schoolbook multiplication
157    /// 2. Perform carry propagation to get proper limb representation
158    /// 3. Apply NIST P-256 specific fast reduction (Solinas method)
159    ///
160    /// The multiplication is performed in three phases to maintain clarity
161    /// and correctness while achieving good performance.
162    pub fn mul(&self, other: &Self) -> Self {
163        // Phase 1: Accumulate partial products in 128-bit temporaries
164        // This prevents overflow during the schoolbook multiplication
165        let mut t = [0u128; 16];
166        for i in 0..8 {
167            for j in 0..8 {
168                t[i + j] += (self.0[i] as u128) * (other.0[j] as u128);
169            }
170        }
171
172        // Phase 2: Carry propagation to convert to 32-bit limb representation
173        let mut prod = [0u32; 16];
174        let mut carry: u128 = 0;
175        for i in 0..16 {
176            let v = t[i] + carry;
177            prod[i] = (v & 0xffff_ffff) as u32;
178            carry = v >> 32;
179        }
180
181        // Phase 3: Apply NIST P-256 fast reduction
182        Self::reduce_wide(prod)
183    }
184
185    /// Field squaring: self² mod p
186    ///
187    /// Optimized version of multiplication for the case where both operands
188    /// are the same. Currently implemented as self.mul(self) but could be
189    /// optimized further with dedicated squaring algorithms.
190    #[inline(always)]
191    pub fn square(&self) -> Self {
192        self.mul(self)
193    }
194
195    /// Compute the modular multiplicative inverse using Fermat's Little Theorem
196    ///
197    /// For prime fields, a^(p-1) ≡ 1 (mod p), so a^(p-2) ≡ a^(-1) (mod p).
198    /// Uses binary exponentiation (square-and-multiply) for efficiency.
199    ///
200    /// Returns an error if attempting to invert zero (which has no inverse).
201    pub fn invert(&self) -> Result<Self> {
202        if self.is_zero() {
203            return Err(Error::param(
204                "FieldElement",
205                "Inversion of zero is undefined",
206            ));
207        }
208
209        // The exponent p-2 for NIST P-256 in big-endian byte format
210        const P_MINUS_2: [u8; 32] = [
211            0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
212            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
213            0xFF, 0xFF, 0xFF, 0xFD,
214        ];
215
216        // Binary exponentiation: compute self^(p-2) mod p
217        let mut result = FieldElement::one();
218        let mut base = self.clone();
219
220        // Process each bit of the exponent from least to most significant
221        for &byte in P_MINUS_2.iter().rev() {
222            for bit in 0..8 {
223                if (byte >> bit) & 1 == 1 {
224                    result = result.mul(&base);
225                }
226                base = base.square();
227            }
228        }
229
230        Ok(result)
231    }
232
233    /// Check if the field element represents zero
234    ///
235    /// Constant-time check across all limbs to determine if the
236    /// field element is the additive identity.
237    pub fn is_zero(&self) -> bool {
238        for limb in self.0.iter() {
239            if *limb != 0 {
240                return false;
241            }
242        }
243        true
244    }
245
246    /// Return `true` if the field element is odd (least-significant bit set)
247    ///
248    /// Used for point compression to determine the sign of the y-coordinate.
249    /// The parity is determined by the least significant bit of the canonical
250    /// representation.
251    pub fn is_odd(&self) -> bool {
252        (self.0[0] & 1) == 1
253    }
254
255    /// Compute modular square root using exponentiation.
256    ///
257    /// Because the P-256 prime satisfies p ≡ 3 (mod 4), we can compute
258    /// sqrt(a) = a^((p+1)/4) mod p. This is more efficient than the
259    /// general Tonelli-Shanks algorithm.
260    ///
261    /// Returns `None` when the input is a quadratic non-residue (i.e.,
262    /// when no square root exists in the field).
263    ///
264    /// # Algorithm
265    /// For p ≡ 3 (mod 4), if a has a square root, then:
266    /// - sqrt(a) = ±a^((p+1)/4) mod p
267    /// - We return the principal square root (the smaller of the two)
268    pub fn sqrt(&self) -> Option<Self> {
269        if self.is_zero() {
270            return Some(Self::zero());
271        }
272
273        // (p + 1) / 4 for P-256 as big-endian bytes
274        // p = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff
275        // (p + 1) / 4 = 0x3fffffffc0000000400000000000000000000000400000000000000000000000
276        const EXP: [u8; 32] = [
277            0x3F, 0xFF, 0xFF, 0xFF, 0xC0, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
278            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
279            0x00, 0x00, 0x00, 0x00,
280        ];
281
282        let mut result = FieldElement::one();
283        let mut base = self.clone();
284
285        // Binary exponentiation from LSB to MSB
286        for &byte in EXP.iter().rev() {
287            for bit in 0..8 {
288                if ((byte >> bit) & 1) == 1 {
289                    result = result.mul(&base);
290                }
291                base = base.square();
292            }
293        }
294
295        // Verify that result^2 = self (constant-time check)
296        if result.square() == *self {
297            Some(result)
298        } else {
299            None
300        }
301    }
302
303    // Private helper methods
304
305    /// Constant-time conditional selection between two limb arrays
306    ///
307    /// Returns a if flag == 0, returns b if flag == 1
308    /// Used for branchless operations to maintain constant-time guarantees.
309    fn conditional_select(a: &[u32; 8], b: &[u32; 8], flag: Choice) -> Self {
310        let mut out = [0u32; 8];
311        for i in 0..8 {
312            out[i] = u32::conditional_select(&a[i], &b[i], flag);
313        }
314        FieldElement(out)
315    }
316
317    /// 8-limb addition with carry propagation
318    ///
319    /// Performs full-width addition across all limbs, returning both
320    /// the sum and the final carry bit for overflow detection.
321    #[inline(always)]
322    fn adc8(a: [u32; 8], b: [u32; 8]) -> ([u32; 8], u32) {
323        let mut r = [0u32; 8];
324        let mut carry = 0;
325
326        #[allow(clippy::needless_range_loop)] // Index used for multiple arrays
327        for i in 0..8 {
328            // Add corresponding limbs plus carry from previous iteration
329            let (sum1, carry1) = a[i].overflowing_add(b[i]);
330            let (sum2, carry2) = sum1.overflowing_add(carry);
331
332            r[i] = sum2;
333            carry = (carry1 as u32) | (carry2 as u32);
334        }
335
336        (r, carry)
337    }
338
339    /// 8-limb subtraction with borrow propagation
340    ///
341    /// Performs full-width subtraction across all limbs, returning both
342    /// the difference and the final borrow bit for underflow detection.
343    #[inline(always)]
344    fn sbb8(a: [u32; 8], b: [u32; 8]) -> ([u32; 8], u32) {
345        let mut r = [0u32; 8];
346        let mut borrow = 0;
347
348        #[allow(clippy::needless_range_loop)] // Index used for multiple arrays
349        for i in 0..8 {
350            // Subtract corresponding limbs minus borrow from previous iteration
351            let (diff1, borrow1) = a[i].overflowing_sub(b[i]);
352            let (diff2, borrow2) = diff1.overflowing_sub(borrow);
353
354            r[i] = diff2;
355            borrow = (borrow1 as u32) | (borrow2 as u32);
356        }
357        (r, borrow)
358    }
359
360    /// Conditionally subtract p if the current value is >= p
361    ///
362    /// Ensures the field element is in canonical reduced form.
363    /// Used as a final step in arithmetic operations.
364    fn conditional_sub_p(&self) -> Self {
365        let needs_sub = Choice::from((!self.is_valid() as u8) & 1);
366        Self::conditional_sub(self.0, needs_sub)
367    }
368
369    /// Conditionally subtract the field modulus p based on a boolean condition
370    ///
371    /// Uses constant-time selection to avoid branching while maintaining
372    /// the option to perform the subtraction.
373    fn conditional_sub(limbs: [u32; 8], condition: Choice) -> Self {
374        let mut result = [0u32; 8];
375        let (diff, _) = Self::sbb8(limbs, Self::MOD_LIMBS);
376
377        // Constant-time select between original limbs and difference
378        for i in 0..8 {
379            result[i] = u32::conditional_select(&limbs[i], &diff[i], condition);
380        }
381
382        Self(result)
383    }
384
385    /// NIST P-256 specific reduction for 512-bit values using Solinas method
386    /// Fully constant-time Solinas reduction with two carry-folds.
387    pub(crate) fn reduce_wide(t: [u32; 16]) -> FieldElement {
388        // 1) load into signed 128-bit
389        let mut s = [0i128; 16];
390        for (i, &val) in t.iter().enumerate() {
391            s[i] = val as i128;
392        }
393
394        // 2) fold high limbs 8..15 into 0..7 via
395        //    2^256 ≡ 2^224 − 2^192 − 2^96 + 1
396        for i in (8..16).rev() {
397            let v = s[i];
398            s[i] = 0;
399            s[i - 8] = s[i - 8].wrapping_add(v); // +2^0
400            s[i - 5] = s[i - 5].wrapping_sub(v); // -2^96
401            s[i - 2] = s[i - 2].wrapping_sub(v); // -2^192
402            s[i - 1] = s[i - 1].wrapping_add(v); // +2^224
403        }
404
405        // 3) first signed carry-propagate
406        let mut carry1: i128 = 0;
407        for val in s.iter_mut().take(8) {
408            let tmp = *val + carry1;
409            *val = tmp & 0xffff_ffff;
410            carry1 = tmp >> 32; // arithmetic shift
411        }
412
413        // 4) fold carry1 back down (correct indices: 3 & 6)
414        let c1 = carry1;
415        s[0] = s[0].wrapping_add(c1); // +2^0
416        s[3] = s[3].wrapping_sub(c1); // -2^96
417        s[6] = s[6].wrapping_sub(c1); // -2^192
418        s[7] = s[7].wrapping_add(c1); // +2^224
419
420        // 5) second signed carry-propagate
421        let mut carry2: i128 = 0;
422        for val in s.iter_mut().take(8) {
423            let tmp = *val + carry2;
424            *val = tmp & 0xffff_ffff;
425            carry2 = tmp >> 32;
426        }
427
428        // 6) fold carry2 back down (correct indices: 3 & 6)
429        let c2 = carry2;
430        s[0] = s[0].wrapping_add(c2);
431        s[3] = s[3].wrapping_sub(c2);
432        s[6] = s[6].wrapping_sub(c2);
433        s[7] = s[7].wrapping_add(c2);
434
435        // 7) final signed carry-propagate into 32-bit limbs
436        let mut out = [0u32; 8];
437        let mut carry3: i128 = 0;
438        for (i, val) in s.iter().take(8).enumerate() {
439            let tmp = *val + carry3;
440            out[i] = (tmp & 0xffff_ffff) as u32;
441            carry3 = tmp >> 32;
442        }
443
444        // 8) one last constant-time subtract if ≥ p
445        let (subbed, borrow) = Self::sbb8(out, Self::MOD_LIMBS);
446        let need_sub = Choice::from((borrow ^ 1) as u8); // borrow==0 ⇒ out>=p
447        Self::conditional_select(&out, &subbed, need_sub)
448    }
449
450    /// Get the field modulus p as a FieldElement
451    ///
452    /// Returns the NIST P-256 prime modulus for use in reduction operations.
453    pub(crate) fn get_modulus() -> Self {
454        FieldElement(Self::MOD_LIMBS)
455    }
456}