dcrypt_algorithms/ec/p256/field.rs
1//! P-256 field arithmetic implementation
2
3use crate::ec::p256::constants::P256_FIELD_ELEMENT_SIZE;
4use crate::error::{Error, Result};
5use subtle::{Choice, ConditionallySelectable};
6
7/// P-256 field element representing values in F_p
8///
9/// Internally stored as 8 little-endian 32-bit limbs for efficient arithmetic.
10/// All operations maintain the invariant that values are reduced modulo p.
11#[derive(Clone, Debug, PartialEq, Eq)]
12pub struct FieldElement(pub(crate) [u32; 8]);
13
14impl FieldElement {
15 /* -------------------------------------------------------------------- */
16 /* NIST P-256 Field Constants (stored as little-endian 32-bit limbs) */
17 /* -------------------------------------------------------------------- */
18
19 /// The NIST P-256 prime modulus: p = 2^256 - 2^224 + 2^192 + 2^96 - 1
20 /// Stored as 8 little-endian 32-bit limbs where limbs[0] is least significant
21 pub(crate) const MOD_LIMBS: [u32; 8] = [
22 0xFFFF_FFFF, // 2⁰ … 2³¹
23 0xFFFF_FFFF, // 2³² … 2⁶³
24 0xFFFF_FFFF, // 2⁶⁴ … 2⁹⁵
25 0x0000_0000, // 2⁹⁶ … 2¹²⁷
26 0x0000_0000, // 2¹²⁸ … 2¹⁵⁹
27 0x0000_0000, // 2¹⁶⁰ … 2¹⁹¹
28 0x0000_0001, // 2¹⁹² … 2²²³
29 0xFFFF_FFFF, // 2²²⁴ … 2²⁵⁵
30 ];
31
32 /// The curve parameter a = -3 mod p, used in the curve equation y² = x³ + ax + b
33 /// For P-256: a = p - 3
34 pub(crate) const A_M3: [u32; 8] = [
35 0xFFFF_FFFC, // (2³² - 1) - 3 = 2³² - 4
36 0xFFFF_FFFF,
37 0xFFFF_FFFF,
38 0x0000_0000,
39 0x0000_0000,
40 0x0000_0000,
41 0x0000_0001,
42 0xFFFF_FFFF, // Most significant limb
43 ];
44
45 /// The additive identity element: 0
46 pub fn zero() -> Self {
47 FieldElement([0, 0, 0, 0, 0, 0, 0, 0])
48 }
49
50 /// The multiplicative identity element: 1
51 pub fn one() -> Self {
52 FieldElement([1, 0, 0, 0, 0, 0, 0, 0])
53 }
54
55 /// Create a field element from big-endian byte representation
56 ///
57 /// Validates that the input represents a value less than the field modulus p.
58 /// Returns an error if the value is >= p.
59 pub fn from_bytes(bytes: &[u8; P256_FIELD_ELEMENT_SIZE]) -> Result<Self> {
60 let mut limbs = [0u32; 8];
61
62 // Convert from big-endian bytes to little-endian limbs
63 // limbs[0] = least-significant 4 bytes (bytes[28..32])
64 // limbs[7] = most-significant 4 bytes (bytes[0..4])
65 #[allow(clippy::needless_range_loop)] // Index used for offset calculation
66 for i in 0..8 {
67 let offset = (7 - i) * 4; // Byte offset: 28, 24, 20, ..., 0
68 limbs[i] = u32::from_be_bytes([
69 bytes[offset],
70 bytes[offset + 1],
71 bytes[offset + 2],
72 bytes[offset + 3],
73 ]);
74 }
75
76 // Validate that the value is in the field (< p)
77 let fe = FieldElement(limbs);
78 if !fe.is_valid() {
79 return Err(Error::param(
80 "FieldElement",
81 "Value must be less than the field modulus",
82 ));
83 }
84
85 Ok(fe)
86 }
87
88 /// Convert field element to big-endian byte representation
89 pub fn to_bytes(&self) -> [u8; P256_FIELD_ELEMENT_SIZE] {
90 let mut bytes = [0u8; P256_FIELD_ELEMENT_SIZE];
91
92 // Convert from little-endian limbs to big-endian bytes
93 for i in 0..8 {
94 let limb_bytes = self.0[i].to_be_bytes();
95 let offset = (7 - i) * 4; // Byte offset: 28, 24, 20, ..., 0
96 bytes[offset..offset + 4].copy_from_slice(&limb_bytes);
97 }
98 bytes
99 }
100
101 /// Constant-time validation that the field element is in canonical form (< p)
102 ///
103 /// Uses constant-time subtraction to check if self < p without branching.
104 /// Returns true if the element is valid (< p), false otherwise.
105 #[inline(always)]
106 pub fn is_valid(&self) -> bool {
107 // Attempt to subtract p from self
108 // If subtraction requires a borrow, then self < p (valid)
109 let (_, borrow) = Self::sbb8(self.0, Self::MOD_LIMBS);
110 borrow == 1
111 }
112
113 /// Constant-time field addition: (self + other) mod p
114 ///
115 /// Algorithm:
116 /// 1. Perform full 256-bit addition with carry detection
117 /// 2. Conditionally subtract p if result >= p
118 /// 3. Ensure result is in canonical form
119 #[inline(always)]
120 pub fn add(&self, other: &Self) -> Self {
121 // Step 1: Full 256-bit addition
122 let (sum, carry) = Self::adc8(self.0, other.0);
123
124 // Step 2: Attempt conditional reduction by subtracting p
125 let (sum_minus_p, borrow) = Self::sbb8(sum, Self::MOD_LIMBS);
126
127 // Step 3: Choose reduced value if:
128 // - Addition overflowed (carry == 1), OR
129 // - Subtraction didn't borrow (borrow == 0), meaning sum >= p
130 let need_reduce = (carry | (borrow ^ 1)) & 1;
131 let reduced = Self::conditional_select(&sum, &sum_minus_p, Choice::from(need_reduce as u8));
132
133 // Step 4: Final canonical reduction
134 reduced.conditional_sub_p()
135 }
136
137 /// Constant-time field subtraction: (self - other) mod p
138 ///
139 /// Algorithm:
140 /// 1. Perform limb-wise subtraction
141 /// 2. If subtraction borrows, add p to get the correct positive result
142 pub fn sub(&self, other: &Self) -> Self {
143 // Step 1: Raw subtraction
144 let (diff, borrow) = Self::sbb8(self.0, other.0);
145
146 // Step 2: If we borrowed, add p to get the correct positive result
147 let (candidate, _) = Self::adc8(diff, Self::MOD_LIMBS);
148
149 // Step 3: Constant-time select based on borrow flag
150 Self::conditional_select(&diff, &candidate, Choice::from(borrow as u8))
151 }
152
153 /// Field multiplication: (self * other) mod p
154 ///
155 /// Algorithm:
156 /// 1. Compute the full 512-bit product using schoolbook multiplication
157 /// 2. Perform carry propagation to get proper limb representation
158 /// 3. Apply NIST P-256 specific fast reduction (Solinas method)
159 ///
160 /// The multiplication is performed in three phases to maintain clarity
161 /// and correctness while achieving good performance.
162 pub fn mul(&self, other: &Self) -> Self {
163 // Phase 1: Accumulate partial products in 128-bit temporaries
164 // This prevents overflow during the schoolbook multiplication
165 let mut t = [0u128; 16];
166 for i in 0..8 {
167 for j in 0..8 {
168 t[i + j] += (self.0[i] as u128) * (other.0[j] as u128);
169 }
170 }
171
172 // Phase 2: Carry propagation to convert to 32-bit limb representation
173 let mut prod = [0u32; 16];
174 let mut carry: u128 = 0;
175 for i in 0..16 {
176 let v = t[i] + carry;
177 prod[i] = (v & 0xffff_ffff) as u32;
178 carry = v >> 32;
179 }
180
181 // Phase 3: Apply NIST P-256 fast reduction
182 Self::reduce_wide(prod)
183 }
184
185 /// Field squaring: self² mod p
186 ///
187 /// Optimized version of multiplication for the case where both operands
188 /// are the same. Currently implemented as self.mul(self) but could be
189 /// optimized further with dedicated squaring algorithms.
190 #[inline(always)]
191 pub fn square(&self) -> Self {
192 self.mul(self)
193 }
194
195 /// Compute the modular multiplicative inverse using Fermat's Little Theorem
196 ///
197 /// For prime fields, a^(p-1) ≡ 1 (mod p), so a^(p-2) ≡ a^(-1) (mod p).
198 /// Uses binary exponentiation (square-and-multiply) for efficiency.
199 ///
200 /// Returns an error if attempting to invert zero (which has no inverse).
201 pub fn invert(&self) -> Result<Self> {
202 if self.is_zero() {
203 return Err(Error::param(
204 "FieldElement",
205 "Inversion of zero is undefined",
206 ));
207 }
208
209 // The exponent p-2 for NIST P-256 in big-endian byte format
210 const P_MINUS_2: [u8; 32] = [
211 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
212 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
213 0xFF, 0xFF, 0xFF, 0xFD,
214 ];
215
216 // Binary exponentiation: compute self^(p-2) mod p
217 let mut result = FieldElement::one();
218 let mut base = self.clone();
219
220 // Process each bit of the exponent from least to most significant
221 for &byte in P_MINUS_2.iter().rev() {
222 for bit in 0..8 {
223 if (byte >> bit) & 1 == 1 {
224 result = result.mul(&base);
225 }
226 base = base.square();
227 }
228 }
229
230 Ok(result)
231 }
232
233 /// Check if the field element represents zero
234 ///
235 /// Constant-time check across all limbs to determine if the
236 /// field element is the additive identity.
237 pub fn is_zero(&self) -> bool {
238 for limb in self.0.iter() {
239 if *limb != 0 {
240 return false;
241 }
242 }
243 true
244 }
245
246 /// Return `true` if the field element is odd (least-significant bit set)
247 ///
248 /// Used for point compression to determine the sign of the y-coordinate.
249 /// The parity is determined by the least significant bit of the canonical
250 /// representation.
251 pub fn is_odd(&self) -> bool {
252 (self.0[0] & 1) == 1
253 }
254
255 /// Compute modular square root using exponentiation.
256 ///
257 /// Because the P-256 prime satisfies p ≡ 3 (mod 4), we can compute
258 /// sqrt(a) = a^((p+1)/4) mod p. This is more efficient than the
259 /// general Tonelli-Shanks algorithm.
260 ///
261 /// Returns `None` when the input is a quadratic non-residue (i.e.,
262 /// when no square root exists in the field).
263 ///
264 /// # Algorithm
265 /// For p ≡ 3 (mod 4), if a has a square root, then:
266 /// - sqrt(a) = ±a^((p+1)/4) mod p
267 /// - We return the principal square root (the smaller of the two)
268 pub fn sqrt(&self) -> Option<Self> {
269 if self.is_zero() {
270 return Some(Self::zero());
271 }
272
273 // (p + 1) / 4 for P-256 as big-endian bytes
274 // p = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff
275 // (p + 1) / 4 = 0x3fffffffc0000000400000000000000000000000400000000000000000000000
276 const EXP: [u8; 32] = [
277 0x3F, 0xFF, 0xFF, 0xFF, 0xC0, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
278 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
279 0x00, 0x00, 0x00, 0x00,
280 ];
281
282 let mut result = FieldElement::one();
283 let mut base = self.clone();
284
285 // Binary exponentiation from LSB to MSB
286 for &byte in EXP.iter().rev() {
287 for bit in 0..8 {
288 if ((byte >> bit) & 1) == 1 {
289 result = result.mul(&base);
290 }
291 base = base.square();
292 }
293 }
294
295 // Verify that result^2 = self (constant-time check)
296 if result.square() == *self {
297 Some(result)
298 } else {
299 None
300 }
301 }
302
303 // Private helper methods
304
305 /// Constant-time conditional selection between two limb arrays
306 ///
307 /// Returns a if flag == 0, returns b if flag == 1
308 /// Used for branchless operations to maintain constant-time guarantees.
309 fn conditional_select(a: &[u32; 8], b: &[u32; 8], flag: Choice) -> Self {
310 let mut out = [0u32; 8];
311 for i in 0..8 {
312 out[i] = u32::conditional_select(&a[i], &b[i], flag);
313 }
314 FieldElement(out)
315 }
316
317 /// 8-limb addition with carry propagation
318 ///
319 /// Performs full-width addition across all limbs, returning both
320 /// the sum and the final carry bit for overflow detection.
321 #[inline(always)]
322 fn adc8(a: [u32; 8], b: [u32; 8]) -> ([u32; 8], u32) {
323 let mut r = [0u32; 8];
324 let mut carry = 0;
325
326 #[allow(clippy::needless_range_loop)] // Index used for multiple arrays
327 for i in 0..8 {
328 // Add corresponding limbs plus carry from previous iteration
329 let (sum1, carry1) = a[i].overflowing_add(b[i]);
330 let (sum2, carry2) = sum1.overflowing_add(carry);
331
332 r[i] = sum2;
333 carry = (carry1 as u32) | (carry2 as u32);
334 }
335
336 (r, carry)
337 }
338
339 /// 8-limb subtraction with borrow propagation
340 ///
341 /// Performs full-width subtraction across all limbs, returning both
342 /// the difference and the final borrow bit for underflow detection.
343 #[inline(always)]
344 fn sbb8(a: [u32; 8], b: [u32; 8]) -> ([u32; 8], u32) {
345 let mut r = [0u32; 8];
346 let mut borrow = 0;
347
348 #[allow(clippy::needless_range_loop)] // Index used for multiple arrays
349 for i in 0..8 {
350 // Subtract corresponding limbs minus borrow from previous iteration
351 let (diff1, borrow1) = a[i].overflowing_sub(b[i]);
352 let (diff2, borrow2) = diff1.overflowing_sub(borrow);
353
354 r[i] = diff2;
355 borrow = (borrow1 as u32) | (borrow2 as u32);
356 }
357 (r, borrow)
358 }
359
360 /// Conditionally subtract p if the current value is >= p
361 ///
362 /// Ensures the field element is in canonical reduced form.
363 /// Used as a final step in arithmetic operations.
364 fn conditional_sub_p(&self) -> Self {
365 let needs_sub = Choice::from((!self.is_valid() as u8) & 1);
366 Self::conditional_sub(self.0, needs_sub)
367 }
368
369 /// Conditionally subtract the field modulus p based on a boolean condition
370 ///
371 /// Uses constant-time selection to avoid branching while maintaining
372 /// the option to perform the subtraction.
373 fn conditional_sub(limbs: [u32; 8], condition: Choice) -> Self {
374 let mut result = [0u32; 8];
375 let (diff, _) = Self::sbb8(limbs, Self::MOD_LIMBS);
376
377 // Constant-time select between original limbs and difference
378 for i in 0..8 {
379 result[i] = u32::conditional_select(&limbs[i], &diff[i], condition);
380 }
381
382 Self(result)
383 }
384
385 /// NIST P-256 specific reduction for 512-bit values using Solinas method
386 /// Fully constant-time Solinas reduction with two carry-folds.
387 pub(crate) fn reduce_wide(t: [u32; 16]) -> FieldElement {
388 // 1) load into signed 128-bit
389 let mut s = [0i128; 16];
390 for (i, &val) in t.iter().enumerate() {
391 s[i] = val as i128;
392 }
393
394 // 2) fold high limbs 8..15 into 0..7 via
395 // 2^256 ≡ 2^224 − 2^192 − 2^96 + 1
396 for i in (8..16).rev() {
397 let v = s[i];
398 s[i] = 0;
399 s[i - 8] = s[i - 8].wrapping_add(v); // +2^0
400 s[i - 5] = s[i - 5].wrapping_sub(v); // -2^96
401 s[i - 2] = s[i - 2].wrapping_sub(v); // -2^192
402 s[i - 1] = s[i - 1].wrapping_add(v); // +2^224
403 }
404
405 // 3) first signed carry-propagate
406 let mut carry1: i128 = 0;
407 for val in s.iter_mut().take(8) {
408 let tmp = *val + carry1;
409 *val = tmp & 0xffff_ffff;
410 carry1 = tmp >> 32; // arithmetic shift
411 }
412
413 // 4) fold carry1 back down (correct indices: 3 & 6)
414 let c1 = carry1;
415 s[0] = s[0].wrapping_add(c1); // +2^0
416 s[3] = s[3].wrapping_sub(c1); // -2^96
417 s[6] = s[6].wrapping_sub(c1); // -2^192
418 s[7] = s[7].wrapping_add(c1); // +2^224
419
420 // 5) second signed carry-propagate
421 let mut carry2: i128 = 0;
422 for val in s.iter_mut().take(8) {
423 let tmp = *val + carry2;
424 *val = tmp & 0xffff_ffff;
425 carry2 = tmp >> 32;
426 }
427
428 // 6) fold carry2 back down (correct indices: 3 & 6)
429 let c2 = carry2;
430 s[0] = s[0].wrapping_add(c2);
431 s[3] = s[3].wrapping_sub(c2);
432 s[6] = s[6].wrapping_sub(c2);
433 s[7] = s[7].wrapping_add(c2);
434
435 // 7) final signed carry-propagate into 32-bit limbs
436 let mut out = [0u32; 8];
437 let mut carry3: i128 = 0;
438 for (i, val) in s.iter().take(8).enumerate() {
439 let tmp = *val + carry3;
440 out[i] = (tmp & 0xffff_ffff) as u32;
441 carry3 = tmp >> 32;
442 }
443
444 // 8) one last constant-time subtract if ≥ p
445 let (subbed, borrow) = Self::sbb8(out, Self::MOD_LIMBS);
446 let need_sub = Choice::from((borrow ^ 1) as u8); // borrow==0 ⇒ out>=p
447 Self::conditional_select(&out, &subbed, need_sub)
448 }
449
450 /// Get the field modulus p as a FieldElement
451 ///
452 /// Returns the NIST P-256 prime modulus for use in reduction operations.
453 pub(crate) fn get_modulus() -> Self {
454 FieldElement(Self::MOD_LIMBS)
455 }
456}