Skip to main content

blvm_secp256k1/field/
layout_5x52.rs

1//! 5x52 field element layout for x86_64 and aarch64.
2//!
3//! Pure Rust implementation using u128 for wide multiplication.
4//! Field modulus: p = 2^256 - 2^32 - 977 (SEC2 secp256k1)
5
6use crate::modinv64::{self, Signed62, SECP256K1_FE_MODINV_MODINFO};
7use subtle::{Choice, ConditionallySelectable, ConstantTimeEq};
8
9/// Field element in 5x52 limb representation.
10/// Each limb holds 52 bits (except limb 4 which holds 48).
11/// Layout matches libsecp256k1 field_5x52.
12#[repr(C)]
13#[derive(Clone, Copy, Debug, Default)]
14pub struct FieldElement {
15    pub n: [u64; 5],
16}
17
18const M: u64 = 0xFFFFFFFFFFFFF; // 52 bits
19const M4: u64 = 0x0FFFFFFFFFFFF; // 48 bits for limb 4
20const R: u64 = 0x1000003D10;
21const R_LOW: u64 = 0x1000003D1; // p = 2^256 - 2^32 - 977
22const P0: u64 = 0xFFFFEFFFFFC2F; // high limb of p for comparison
23
24impl FieldElement {
25    pub fn zero() -> Self {
26        Self { n: [0, 0, 0, 0, 0] }
27    }
28
29    pub fn one() -> Self {
30        Self { n: [1, 0, 0, 0, 0] }
31    }
32
33    /// Set to a small integer a in [0, 0x7FFF].
34    pub fn set_int(&mut self, a: u32) {
35        debug_assert!(a <= 0x7FFF);
36        self.n[0] = a as u64;
37        self.n[1] = 0;
38        self.n[2] = 0;
39        self.n[3] = 0;
40        self.n[4] = 0;
41    }
42
43    /// True if the normalized value is odd. Caller must ensure magnitude <= 1.
44    pub fn is_odd(&self) -> bool {
45        self.n[0] & 1 == 1
46    }
47
48    /// Set from 32-byte big-endian encoding (mod p).
49    /// Returns false if the value is >= p.
50    pub fn set_b32_limit(&mut self, bytes: &[u8; 32]) -> bool {
51        self.set_b32_mod(bytes);
52        // Check if value >= p
53        !((self.n[4] == M4) && (self.n[3] & self.n[2] & self.n[1]) == M && (self.n[0] >= P0))
54    }
55
56    /// Set from 32-byte big-endian encoding, reducing mod p.
57    pub fn set_b32_mod(&mut self, a: &[u8; 32]) {
58        self.n[0] = u64::from(a[31])
59            | (u64::from(a[30]) << 8)
60            | (u64::from(a[29]) << 16)
61            | (u64::from(a[28]) << 24)
62            | (u64::from(a[27]) << 32)
63            | (u64::from(a[26]) << 40)
64            | ((u64::from(a[25]) & 0xF) << 48);
65        self.n[1] = ((a[25] >> 4) & 0xF) as u64
66            | (u64::from(a[24]) << 4)
67            | (u64::from(a[23]) << 12)
68            | (u64::from(a[22]) << 20)
69            | (u64::from(a[21]) << 28)
70            | (u64::from(a[20]) << 36)
71            | (u64::from(a[19]) << 44);
72        self.n[2] = u64::from(a[18])
73            | (u64::from(a[17]) << 8)
74            | (u64::from(a[16]) << 16)
75            | (u64::from(a[15]) << 24)
76            | (u64::from(a[14]) << 32)
77            | (u64::from(a[13]) << 40)
78            | ((u64::from(a[12]) & 0xF) << 48);
79        self.n[3] = ((a[12] >> 4) & 0xF) as u64
80            | (u64::from(a[11]) << 4)
81            | (u64::from(a[10]) << 12)
82            | (u64::from(a[9]) << 20)
83            | (u64::from(a[8]) << 28)
84            | (u64::from(a[7]) << 36)
85            | (u64::from(a[6]) << 44);
86        self.n[4] = u64::from(a[5])
87            | (u64::from(a[4]) << 8)
88            | (u64::from(a[3]) << 16)
89            | (u64::from(a[2]) << 24)
90            | (u64::from(a[1]) << 32)
91            | (u64::from(a[0]) << 40);
92    }
93
94    /// Write normalized value to 32-byte big-endian.
95    pub fn get_b32(&self, r: &mut [u8; 32]) {
96        let a = &self.n;
97        r[0] = (a[4] >> 40) as u8;
98        r[1] = (a[4] >> 32) as u8;
99        r[2] = (a[4] >> 24) as u8;
100        r[3] = (a[4] >> 16) as u8;
101        r[4] = (a[4] >> 8) as u8;
102        r[5] = a[4] as u8;
103        r[6] = (a[3] >> 44) as u8;
104        r[7] = (a[3] >> 36) as u8;
105        r[8] = (a[3] >> 28) as u8;
106        r[9] = (a[3] >> 20) as u8;
107        r[10] = (a[3] >> 12) as u8;
108        r[11] = (a[3] >> 4) as u8;
109        r[12] = (((a[2] >> 48) & 0xF) | ((a[3] & 0xF) << 4)) as u8;
110        r[13] = (a[2] >> 40) as u8;
111        r[14] = (a[2] >> 32) as u8;
112        r[15] = (a[2] >> 24) as u8;
113        r[16] = (a[2] >> 16) as u8;
114        r[17] = (a[2] >> 8) as u8;
115        r[18] = a[2] as u8;
116        r[19] = (a[1] >> 44) as u8;
117        r[20] = (a[1] >> 36) as u8;
118        r[21] = (a[1] >> 28) as u8;
119        r[22] = (a[1] >> 20) as u8;
120        r[23] = (a[1] >> 12) as u8;
121        r[24] = (a[1] >> 4) as u8;
122        r[25] = (((a[0] >> 48) & 0xF) | ((a[1] & 0xF) << 4)) as u8;
123        r[26] = (a[0] >> 40) as u8;
124        r[27] = (a[0] >> 32) as u8;
125        r[28] = (a[0] >> 24) as u8;
126        r[29] = (a[0] >> 16) as u8;
127        r[30] = (a[0] >> 8) as u8;
128        r[31] = a[0] as u8;
129    }
130
131    /// Normalize to canonical form (constant-time).
132    #[inline(always)]
133    pub fn normalize(&mut self) {
134        let mut t0 = self.n[0];
135        let mut t1 = self.n[1];
136        let mut t2 = self.n[2];
137        let mut t3 = self.n[3];
138        let mut t4 = self.n[4];
139
140        let mut x = t4 >> 48;
141        t4 &= M4;
142
143        t0 += x * R_LOW;
144        t1 += t0 >> 52;
145        t0 &= M;
146        let mut m = t1;
147        t2 += t1 >> 52;
148        t1 &= M;
149        m &= t2;
150        t3 += t2 >> 52;
151        t2 &= M;
152        m &= t3;
153        t4 += t3 >> 52;
154        t3 &= M;
155        m &= t4;
156
157        // For value p we have t4=M4, m=M4 (not M). Check (m == M) | (m == M4) for p.
158        x = (t4 >> 48) | (((t4 == M4) as u64) & ((m == M || m == M4) as u64) & ((t0 >= P0) as u64));
159
160        t0 += x * R_LOW;
161        t1 += t0 >> 52;
162        t0 &= M;
163        t2 += t1 >> 52;
164        t1 &= M;
165        t3 += t2 >> 52;
166        t2 &= M;
167        t4 += t3 >> 52;
168        t3 &= M;
169        t4 &= M4;
170
171        self.n[0] = t0;
172        self.n[1] = t1;
173        self.n[2] = t2;
174        self.n[3] = t3;
175        self.n[4] = t4;
176    }
177
178    pub fn is_zero(&self) -> bool {
179        (self.n[0] | self.n[1] | self.n[2] | self.n[3] | self.n[4]) == 0
180    }
181
182    /// Add a small integer to r.n[0]. `a` must be in [0, 0x7FFF].
183    pub fn add_int(&mut self, a: u32) {
184        debug_assert!(a <= 0x7FFF);
185        self.n[0] += a as u64;
186    }
187
188    /// Multiply all limbs by a small integer.
189    #[inline(always)]
190    pub fn mul_int(&mut self, a: u32) {
191        let a = a as u64;
192        self.n[0] *= a;
193        self.n[1] *= a;
194        self.n[2] *= a;
195        self.n[3] *= a;
196        self.n[4] *= a;
197    }
198
199    /// Half: r = r/2 mod p. If r is odd, add (p+1)/2 first.
200    pub fn half(&mut self) {
201        let mut t0 = self.n[0];
202        let mut t1 = self.n[1];
203        let mut t2 = self.n[2];
204        let mut t3 = self.n[3];
205        let mut t4 = self.n[4];
206        let one = 1u64;
207        let mask = if t0 & one == 1 { u64::MAX >> 12 } else { 0 };
208        t0 += P0 & mask;
209        t1 += mask;
210        t2 += mask;
211        t3 += mask;
212        t4 += mask >> 4;
213        self.n[0] = (t0 >> 1) + ((t1 & one) << 51);
214        self.n[1] = (t1 >> 1) + ((t2 & one) << 51);
215        self.n[2] = (t2 >> 1) + ((t3 & one) << 51);
216        self.n[3] = (t3 >> 1) + ((t4 & one) << 51);
217        self.n[4] = t4 >> 1;
218    }
219
220    /// Normalize weakly: propagate carries but skip final reduction check.
221    /// Result may be in [0, p) or exactly p (represented as overflow in limb 4).
222    pub fn normalize_weak(&mut self) {
223        let mut t0 = self.n[0];
224        let mut t1 = self.n[1];
225        let mut t2 = self.n[2];
226        let mut t3 = self.n[3];
227        let mut t4 = self.n[4];
228        let x = t4 >> 48;
229        t4 &= M4;
230        t0 += x * R_LOW;
231        t1 += t0 >> 52;
232        t0 &= M;
233        t2 += t1 >> 52;
234        t1 &= M;
235        t3 += t2 >> 52;
236        t2 &= M;
237        t4 += t3 >> 52;
238        t3 &= M;
239        self.n[0] = t0;
240        self.n[1] = t1;
241        self.n[2] = t2;
242        self.n[3] = t3;
243        self.n[4] = t4;
244    }
245
246    /// Returns true if this value normalizes to 0 (either raw 0 or p).
247    pub fn normalizes_to_zero(&self) -> bool {
248        let mut t0 = self.n[0];
249        let mut t1 = self.n[1];
250        let mut t2 = self.n[2];
251        let mut t3 = self.n[3];
252        let mut t4 = self.n[4];
253        let x = t4 >> 48;
254        t4 &= M4;
255        t0 += x * R_LOW;
256        t1 += t0 >> 52;
257        t0 &= M;
258        let mut z0 = t0;
259        let mut z1 = t0 ^ 0x1000003D0;
260        t2 += t1 >> 52;
261        t1 &= M;
262        z0 |= t1;
263        z1 &= t1;
264        t3 += t2 >> 52;
265        t2 &= M;
266        z0 |= t2;
267        z1 &= t2;
268        t4 += t3 >> 52;
269        t3 &= M;
270        z0 |= t3;
271        z1 &= t3;
272        z0 |= t4;
273        z1 &= t4 ^ 0xF000000000000;
274        (z0 == 0) || (z1 == M)
275    }
276
277    /// Variable-time version of normalizes_to_zero (early exit).
278    pub fn normalizes_to_zero_var(&self) -> bool {
279        let t0 = self.n[0];
280        let t4 = self.n[4];
281        let x = t4 >> 48;
282        let t0_new = t0 + x * R_LOW;
283        let mut z0 = t0_new & M;
284        let mut z1 = z0 ^ 0x1000003D0;
285        if z0 != 0 && z1 != M {
286            return false;
287        }
288        let mut t1 = self.n[1];
289        let mut t2 = self.n[2];
290        let mut t3 = self.n[3];
291        let mut t4 = t4 & M4;
292        t1 += t0_new >> 52;
293        t2 += t1 >> 52;
294        t1 &= M;
295        z0 |= t1;
296        z1 &= t1;
297        t3 += t2 >> 52;
298        t2 &= M;
299        z0 |= t2;
300        z1 &= t2;
301        t4 += t3 >> 52;
302        t3 &= M;
303        z0 |= t3;
304        z1 &= t3;
305        z0 |= t4;
306        z1 &= t4 ^ 0xF000000000000;
307        (z0 == 0) || (z1 == M)
308    }
309
310    /// Returns true if a is a quadratic residue (square) mod p.
311    /// Variable-time. Uses sqrt internally.
312    pub fn is_square_var(a: &FieldElement) -> bool {
313        let mut t = FieldElement::zero();
314        t.sqrt(a)
315    }
316
317    /// Square root: r = sqrt(a) mod p if it exists.
318    /// Returns true if a square root exists. Uses a^((p+1)/4) since p ≡ 3 (mod 4).
319    #[inline]
320    pub fn sqrt(&mut self, a: &FieldElement) -> bool {
321        // Addition chain for (p+1)/4 with blocks {2, 22, 223}
322        let mut x2 = FieldElement::zero();
323        let mut x3 = FieldElement::zero();
324        let mut x6;
325        let mut x9;
326        let mut x11;
327        let mut x22;
328        let mut x44;
329        let mut x88;
330        let mut x176;
331        let mut x220;
332        let mut x223;
333        let mut t1;
334
335        x2.sqr(a);
336        let mut t = FieldElement::zero();
337        t.mul(&x2, a);
338        x2 = t;
339        x3.sqr(&x2);
340        t.mul(&x3, a);
341        x3 = t;
342        // Same addition chain as libsecp256k1; use in-place sqr to match their C.
343        x6 = x3;
344        for _ in 0..3 {
345            x6.sqr_assign();
346        }
347        t.mul(&x6, &x3);
348        x6 = t;
349        x9 = x6;
350        for _ in 0..3 {
351            x9.sqr_assign();
352        }
353        t.mul(&x9, &x3);
354        x9 = t;
355        x11 = x9;
356        for _ in 0..2 {
357            x11.sqr_assign();
358        }
359        t.mul(&x11, &x2);
360        x11 = t;
361        x22 = x11;
362        for _ in 0..11 {
363            x22.sqr_assign();
364        }
365        t.mul(&x22, &x11);
366        x22 = t;
367        x44 = x22;
368        for _ in 0..22 {
369            x44.sqr_assign();
370        }
371        t.mul(&x44, &x22);
372        x44 = t;
373        x88 = x44;
374        for _ in 0..44 {
375            x88.sqr_assign();
376        }
377        t.mul(&x88, &x44);
378        x88 = t;
379        x176 = x88;
380        for _ in 0..88 {
381            x176.sqr_assign();
382        }
383        t.mul(&x176, &x88);
384        x176 = t;
385        x220 = x176;
386        for _ in 0..44 {
387            x220.sqr_assign();
388        }
389        t.mul(&x220, &x44);
390        x220 = t;
391        x223 = x220;
392        for _ in 0..3 {
393            x223.sqr_assign();
394        }
395        t.mul(&x223, &x3);
396        t1 = t;
397        for _ in 0..23 {
398            t1.sqr_assign();
399        }
400        t.mul(&t1, &x22);
401        t1 = t;
402        for _ in 0..6 {
403            t1.sqr_assign();
404        }
405        t.mul(&t1, &x2);
406        t1 = t;
407        t1.sqr_assign();
408        self.sqr(&t1);
409
410        let mut check = FieldElement::zero();
411        check.sqr(self);
412        FieldElement::fe_equal(&check, a)
413    }
414
415    /// Field equality (works with unnormalized): a == b iff (-a + b) normalizes to zero.
416    pub fn fe_equal(a: &FieldElement, b: &FieldElement) -> bool {
417        let mut na = *a;
418        let a_copy = *a;
419        na.negate(&a_copy, 1);
420        na.add_assign(b);
421        na.normalizes_to_zero_var()
422    }
423
424    /// Add another field element (in place).
425    #[inline(always)]
426    pub fn add_assign(&mut self, a: &FieldElement) {
427        self.n[0] += a.n[0];
428        self.n[1] += a.n[1];
429        self.n[2] += a.n[2];
430        self.n[3] += a.n[3];
431        self.n[4] += a.n[4];
432    }
433
434    /// Add: r = a + b.
435    #[inline(always)]
436    pub fn add(&mut self, a: &FieldElement, b: &FieldElement) {
437        *self = *a;
438        self.add_assign(b);
439    }
440
441    /// Compare normalized field elements. Returns -1 if a < b, 0 if a == b, 1 if a > b.
442    pub fn cmp_var(a: &FieldElement, b: &FieldElement) -> i32 {
443        let mut na = *a;
444        let mut nb = *b;
445        na.normalize();
446        nb.normalize();
447        for i in (0..5).rev() {
448            if na.n[i] < nb.n[i] {
449                return -1;
450            }
451            if na.n[i] > nb.n[i] {
452                return 1;
453            }
454        }
455        0
456    }
457
458    /// Negate (r = -a mod p). Assumes magnitude <= 32.
459    #[inline(always)]
460    pub fn negate(&mut self, a: &FieldElement, m: u32) {
461        let m = m as u64;
462        self.n[0] = P0 * 2 * (m + 1) - a.n[0];
463        self.n[1] = M * 2 * (m + 1) - a.n[1];
464        self.n[2] = M * 2 * (m + 1) - a.n[2];
465        self.n[3] = M * 2 * (m + 1) - a.n[3];
466        self.n[4] = M4 * 2 * (m + 1) - a.n[4];
467    }
468
469    /// Multiply: r = a * b (mod p)
470    #[inline(always)]
471    pub fn mul(&mut self, a: &FieldElement, b: &FieldElement) {
472        fe_mul_inner(&mut self.n, &a.n, &b.n);
473    }
474
475    /// Square: r = a^2 (mod p)
476    #[inline(always)]
477    pub fn sqr(&mut self, a: &FieldElement) {
478        fe_sqr_inner(&mut self.n, &a.n);
479    }
480
481    /// Square in place: self = self^2. Matches libsecp256k1's in-place sqr in sqrt loops.
482    #[inline(always)]
483    pub fn sqr_assign(&mut self) {
484        let a = *self;
485        self.sqr(&a);
486    }
487
488    /// Modular inverse via modinv64 (Bernstein-Yang safegcd). Constant-time.
489    #[inline(always)]
490    pub fn inv(&mut self, a: &FieldElement) {
491        let mut tmp = *a;
492        tmp.normalize();
493        let mut s = fe_to_signed62(&tmp);
494        modinv64::modinv64(&mut s, &SECP256K1_FE_MODINV_MODINFO);
495        fe_from_signed62(self, &s);
496    }
497
498    /// Constant-time conditional move: if flag, set self = a.
499    pub fn cmov(&mut self, a: &FieldElement, flag: Choice) {
500        for i in 0..5 {
501            self.n[i] = u64::conditional_select(&self.n[i], &a.n[i], flag);
502        }
503    }
504
505    /// Convert to compact storage (4x64). Input must be normalized.
506    pub fn to_storage(&self, r: &mut FeStorage) {
507        let a = &self.n;
508        r.n[0] = a[0] | a[1] << 52;
509        r.n[1] = a[1] >> 12 | a[2] << 40;
510        r.n[2] = a[2] >> 24 | a[3] << 28;
511        r.n[3] = a[3] >> 36 | a[4] << 16;
512    }
513
514    /// Convert from compact storage. Output limbs are already in canonical form
515    /// (each fits its bit-width), matching libsecp256k1 which marks this as normalized=1, magnitude=1.
516    pub fn from_storage(&mut self, a: &FeStorage) {
517        self.n[0] = a.n[0] & M;
518        self.n[1] = a.n[0] >> 52 | ((a.n[1] << 12) & M);
519        self.n[2] = a.n[1] >> 40 | ((a.n[2] << 24) & M);
520        self.n[3] = a.n[2] >> 28 | ((a.n[3] << 36) & M);
521        self.n[4] = a.n[3] >> 16;
522    }
523}
524
525/// Compact field storage (4x64). Matches libsecp256k1 fe_storage.
526#[repr(C)]
527#[derive(Clone, Copy, Debug)]
528pub struct FeStorage {
529    pub n: [u64; 4],
530}
531
532impl FeStorage {
533    pub fn cmov(&mut self, a: &FeStorage, flag: Choice) {
534        for i in 0..4 {
535            self.n[i] = u64::conditional_select(&self.n[i], &a.n[i], flag);
536        }
537    }
538}
539
540impl ConstantTimeEq for FieldElement {
541    fn ct_eq(&self, other: &Self) -> Choice {
542        self.n[0].ct_eq(&other.n[0])
543            & self.n[1].ct_eq(&other.n[1])
544            & self.n[2].ct_eq(&other.n[2])
545            & self.n[3].ct_eq(&other.n[3])
546            & self.n[4].ct_eq(&other.n[4])
547    }
548}
549
550impl PartialEq for FieldElement {
551    fn eq(&self, other: &Self) -> bool {
552        self.ct_eq(other).into()
553    }
554}
555
556impl Eq for FieldElement {}
557
558const M62: u64 = u64::MAX >> 2;
559
560/// Convert 5x52 field element to signed62 (for modinv64).
561#[inline(always)]
562fn fe_to_signed62(a: &FieldElement) -> Signed62 {
563    let a0 = a.n[0];
564    let a1 = a.n[1];
565    let a2 = a.n[2];
566    let a3 = a.n[3];
567    let a4 = a.n[4];
568    Signed62 {
569        v: [
570            ((a0 | a1 << 52) & M62) as i64,
571            ((a1 >> 10 | a2 << 42) & M62) as i64,
572            ((a2 >> 20 | a3 << 32) & M62) as i64,
573            ((a3 >> 30 | a4 << 22) & M62) as i64,
574            (a4 >> 40) as i64,
575        ],
576    }
577}
578
579/// Convert signed62 to 5x52 field element.
580#[inline(always)]
581fn fe_from_signed62(r: &mut FieldElement, a: &Signed62) {
582    let a0 = a.v[0] as u64;
583    let a1 = a.v[1] as u64;
584    let a2 = a.v[2] as u64;
585    let a3 = a.v[3] as u64;
586    let a4 = a.v[4] as u64;
587    r.n[0] = a0 & M;
588    r.n[1] = (a0 >> 52 | a1 << 10) & M;
589    r.n[2] = (a1 >> 42 | a2 << 20) & M;
590    r.n[3] = (a2 >> 32 | a3 << 30) & M;
591    r.n[4] = a3 >> 22 | a4 << 40;
592}
593
594/// 5x52 field multiplication using u128 (from field_5x52_int128_impl.h)
595#[inline(always)]
596fn fe_mul_inner(r: &mut [u64; 5], a: &[u64; 5], b: &[u64; 5]) {
597    let a0 = a[0];
598    let a1 = a[1];
599    let a2 = a[2];
600    let a3 = a[3];
601    let a4 = a[4];
602
603    let mut d: u128 = (a0 as u128) * (b[3] as u128);
604    d += (a1 as u128) * (b[2] as u128);
605    d += (a2 as u128) * (b[1] as u128);
606    d += (a3 as u128) * (b[0] as u128);
607
608    let mut c: u128 = (a4 as u128) * (b[4] as u128);
609    d += (R as u128) * ((c as u64) as u128);
610    c >>= 64;
611
612    let t3 = (d as u64) & M;
613    d >>= 52;
614
615    d += (a0 as u128) * (b[4] as u128);
616    d += (a1 as u128) * (b[3] as u128);
617    d += (a2 as u128) * (b[2] as u128);
618    d += (a3 as u128) * (b[1] as u128);
619    d += (a4 as u128) * (b[0] as u128);
620    d += ((R as u128) << 12) * ((c as u64) as u128);
621
622    let mut t4 = (d as u64) & M;
623    d >>= 52;
624    let tx = t4 >> 48;
625    t4 &= M >> 4;
626
627    c = (a0 as u128) * (b[0] as u128);
628    d += (a1 as u128) * (b[4] as u128);
629    d += (a2 as u128) * (b[3] as u128);
630    d += (a3 as u128) * (b[2] as u128);
631    d += (a4 as u128) * (b[1] as u128);
632
633    let mut u0 = (d as u64) & M;
634    d >>= 52;
635    u0 = (u0 << 4) | tx;
636    c += (u0 as u128) * ((R >> 4) as u128);
637
638    r[0] = (c as u64) & M;
639    c >>= 52;
640
641    c += (a0 as u128) * (b[1] as u128);
642    c += (a1 as u128) * (b[0] as u128);
643    d += (a2 as u128) * (b[4] as u128);
644    d += (a3 as u128) * (b[3] as u128);
645    d += (a4 as u128) * (b[2] as u128);
646    c += (R as u128) * (((d as u64) & M) as u128);
647    d >>= 52;
648
649    r[1] = (c as u64) & M;
650    c >>= 52;
651
652    c += (a0 as u128) * (b[2] as u128);
653    c += (a1 as u128) * (b[1] as u128);
654    c += (a2 as u128) * (b[0] as u128);
655    d += (a3 as u128) * (b[4] as u128);
656    d += (a4 as u128) * (b[3] as u128);
657    c += (R as u128) * ((d as u64) as u128);
658    d >>= 64;
659
660    r[2] = (c as u64) & M;
661    c >>= 52;
662    c += ((R as u128) << 12) * ((d as u64) as u128);
663    c += t3 as u128;
664
665    r[3] = (c as u64) & M;
666    c >>= 52;
667    r[4] = (c as u64) + t4;
668}
669
670/// 5x52 field squaring using u128
671#[inline(always)]
672fn fe_sqr_inner(r: &mut [u64; 5], a: &[u64; 5]) {
673    let a0 = a[0];
674    let a1 = a[1];
675    let a2 = a[2];
676    let a3 = a[3];
677    let mut a4 = a[4];
678
679    let mut d: u128 = (a0 as u128) * 2 * (a3 as u128);
680    d += (a1 as u128) * 2 * (a2 as u128);
681
682    let mut c: u128 = (a4 as u128) * (a4 as u128);
683    d += (R as u128) * ((c as u64) as u128);
684    c >>= 64;
685
686    let t3 = (d as u64) & M;
687    d >>= 52;
688
689    a4 *= 2;
690    d += (a0 as u128) * (a4 as u128);
691    d += (a1 as u128) * 2 * (a3 as u128);
692    d += (a2 as u128) * (a2 as u128);
693    d += ((R as u128) << 12) * ((c as u64) as u128);
694
695    let mut t4 = (d as u64) & M;
696    d >>= 52;
697    let tx = t4 >> 48;
698    t4 &= M >> 4;
699
700    c = (a0 as u128) * (a0 as u128);
701    d += (a1 as u128) * (a4 as u128);
702    d += (a2 as u128) * 2 * (a3 as u128);
703
704    let mut u0 = (d as u64) & M;
705    d >>= 52;
706    u0 = (u0 << 4) | tx;
707    c += (u0 as u128) * ((R >> 4) as u128);
708
709    r[0] = (c as u64) & M;
710    c >>= 52;
711
712    let a0_2 = a0 * 2;
713    c += (a0_2 as u128) * (a1 as u128);
714    d += (a2 as u128) * (a4 as u128);
715    d += (a3 as u128) * (a3 as u128);
716    c += (R as u128) * (((d as u64) & M) as u128);
717    d >>= 52;
718
719    r[1] = (c as u64) & M;
720    c >>= 52;
721
722    c += (a0_2 as u128) * (a2 as u128);
723    c += (a1 as u128) * (a1 as u128);
724    d += (a3 as u128) * (a4 as u128);
725    c += (R as u128) * ((d as u64) as u128);
726    d >>= 64;
727
728    r[2] = (c as u64) & M;
729    c >>= 52;
730    c += ((R as u128) << 12) * ((d as u64) as u128);
731    c += t3 as u128;
732
733    r[3] = (c as u64) & M;
734    c >>= 52;
735    r[4] = (c as u64) + t4;
736}