Skip to main content

sqisign_verify/fp/
level1.rs

1//!
2//! The prime is `p = 5 * 2^248 - 1`. Field elements are 5-limb arrays
3//! `[u64; 5]` storing values in unsaturated radix `2^51`: each limb
4//! holds a roughly-51-bit quantity, leaving 13 carry bits at the top.
5//! Arithmetic operates in an internal Montgomery form; canonical
6//! little-endian byte encoding (via `fp_encode` / `fp_decode`)
7//! returns the standard integer representation in `[0, p)`.
8//!
9//! The free functions below implement the limb-level primitives;
10//! the [`FpBackend`] trait impl for [`Level1`] at the bottom of the
11//! file wires them into the generic [`super::Fp`] API.
12
13use super::FpBackend;
14use crate::params::Level1;
15use hybrid_array::Array;
16use subtle::Choice;
17
18/// Number of 64-bit limbs in an `Fp` element.
19pub const NLIMBS: usize = 5;
20
21/// Bit width of each unsaturated limb.
22pub const RADIX: u32 = 51;
23
24/// `2^RADIX - 1`: keeps the low 51 bits of a limb.
25pub const MASK: u64 = (1u64 << RADIX) - 1;
26
27/// The contribution of `p + 1 = 5 * 2^248` to limb 4 in radix-`2^51`:
28/// `5 * 2^248 / 2^(51*4) = 5 * 2^44`. Used as the Montgomery folding
29/// constant inside `modmul` and `modsqr`.
30pub const P4: u64 = 0x5000_0000_0000;
31
32/// `2 * P4`, used inside `modadd`, `modsub`, and `modneg` to
33/// keep partial sums in `[-p, p)` before the final reduction step.
34pub const TWO_P4: u64 = 0xa000_0000_0000;
35
36/// `0` as 5 radix-`2^51` limbs.
37pub const ZERO: [u64; NLIMBS] = [0; NLIMBS];
38
39/// Internal Montgomery form of `1 mod p`.
40pub const ONE: [u64; NLIMBS] = [
41    0x0000_0000_0000_0019,
42    0x0000_0000_0000_0000,
43    0x0000_0000_0000_0000,
44    0x0000_0000_0000_0000,
45    0x0000_3000_0000_0000,
46];
47
48/// Internal Montgomery form of 2⁻¹ mod p.
49pub const TWO_INV: [u64; NLIMBS] = [
50    0x0000_0000_0000_000c,
51    0x0000_0000_0000_0000,
52    0x0000_0000_0000_0000,
53    0x0000_0000_0000_0000,
54    0x0000_4000_0000_0000,
55];
56
57/// Internal Montgomery form of 3⁻¹ mod p.
58pub const THREE_INV: [u64; NLIMBS] = [
59    0x0005_5555_5555_555d,
60    0x0002_aaaa_aaaa_aaaa,
61    0x0005_5555_5555_5555,
62    0x0002_aaaa_aaaa_aaaa,
63    0x0000_4555_5555_5555,
64];
65
66/// Internal Montgomery form of `2^256 mod p`. Used by
67/// `fp_decode_reduce` when folding 32-byte input blocks.
68pub const R2: [u64; NLIMBS] = [
69    0x0001_9999_9999_9eb8,
70    0x0003_3333_3333_3333,
71    0x0006_6666_6666_6666,
72    0x0004_cccc_cccc_cccc,
73    0x0000_1999_9999_9999,
74];
75
76/// Conversion constant for `nres`: multiplying by `R2_NRES` and
77/// dividing by Montgomery `R` takes a canonical integer into internal
78/// Montgomery form (`R2_NRES = R^2 mod p` in the internal Montgomery
79/// scheme).
80pub const R2_NRES: [u64; NLIMBS] = [
81    0x0004_cccc_cccc_cf5c,
82    0x0001_9999_9999_9999,
83    0x0003_3333_3333_3333,
84    0x0006_6666_6666_6666,
85    0x0000_0ccc_cccc_cccc,
86];
87
88/// Propagate carries between limbs, leaving each of limbs 0..3 in
89/// `[0, 2^51)` and limb 4 holding the accumulated high bits. Returns
90/// `0xFFFF_FFFF_FFFF_FFFF` if the value (interpreted as two's-complement
91/// via the top bit of limb 4) is negative, else `0`.
92#[inline]
93pub(crate) fn prop(n: &mut [u64; NLIMBS]) -> u64 {
94    let mask = MASK;
95    let mut carry: i64 = n[0] as i64;
96    carry >>= RADIX;
97    n[0] &= mask;
98    for limb in n.iter_mut().take(4).skip(1) {
99        carry = carry.wrapping_add(*limb as i64);
100        *limb = (carry as u64) & mask;
101        carry >>= RADIX;
102    }
103    n[4] = n[4].wrapping_add(carry as u64);
104    // Sign mask: 0 if non-negative, all-ones if negative.
105    let sign = n[4] >> 63;
106    0u64.wrapping_sub(sign)
107}
108
109/// [`prop`] followed by a conditional add of `p` if the propagation
110/// detected a negative intermediate, then a second [`prop`] pass.
111/// Returns `1` if the value was originally negative (and was just
112/// fixed up), `0` otherwise.
113#[inline]
114pub(crate) fn flatten(n: &mut [u64; NLIMBS]) -> u32 {
115    let carry = prop(n);
116    n[0] = n[0].wrapping_sub(1 & carry);
117    n[4] = n[4].wrapping_add(0x5000_0000_0000 & carry);
118    let _ = prop(n);
119    (carry & 1) as u32
120}
121
122/// Final subtract of `p`: tries `n -= p` and adds `p` back if the
123/// result went negative. Returns `1` if the original value was `< p`
124/// (and was preserved), `0` if it was `>= p` (and was reduced).
125#[inline]
126pub(crate) fn modfsb(n: &mut [u64; NLIMBS]) -> u32 {
127    n[0] = n[0].wrapping_add(1);
128    n[4] = n[4].wrapping_sub(0x5000_0000_0000);
129    flatten(n)
130}
131
132/// `n <- a + b mod 2p` (lazily reduced).
133#[inline]
134pub(crate) fn modadd(n: &mut [u64; NLIMBS], a: &[u64; NLIMBS], b: &[u64; NLIMBS]) {
135    n[0] = a[0].wrapping_add(b[0]);
136    n[1] = a[1].wrapping_add(b[1]);
137    n[2] = a[2].wrapping_add(b[2]);
138    n[3] = a[3].wrapping_add(b[3]);
139    n[4] = a[4].wrapping_add(b[4]);
140    n[0] = n[0].wrapping_add(2);
141    n[4] = n[4].wrapping_sub(TWO_P4);
142    let carry = prop(n);
143    n[0] = n[0].wrapping_sub(2 & carry);
144    n[4] = n[4].wrapping_add(TWO_P4 & carry);
145    let _ = prop(n);
146}
147
148/// `n <- a - b mod 2p` (lazily reduced).
149#[inline]
150pub(crate) fn modsub(n: &mut [u64; NLIMBS], a: &[u64; NLIMBS], b: &[u64; NLIMBS]) {
151    n[0] = a[0].wrapping_sub(b[0]);
152    n[1] = a[1].wrapping_sub(b[1]);
153    n[2] = a[2].wrapping_sub(b[2]);
154    n[3] = a[3].wrapping_sub(b[3]);
155    n[4] = a[4].wrapping_sub(b[4]);
156    let carry = prop(n);
157    n[0] = n[0].wrapping_sub(2 & carry);
158    n[4] = n[4].wrapping_add(TWO_P4 & carry);
159    let _ = prop(n);
160}
161
162/// `n <- -b mod 2p` (lazily reduced).
163#[inline]
164pub(crate) fn modneg(n: &mut [u64; NLIMBS], b: &[u64; NLIMBS]) {
165    n[0] = 0u64.wrapping_sub(b[0]);
166    n[1] = 0u64.wrapping_sub(b[1]);
167    n[2] = 0u64.wrapping_sub(b[2]);
168    n[3] = 0u64.wrapping_sub(b[3]);
169    n[4] = 0u64.wrapping_sub(b[4]);
170    let carry = prop(n);
171    n[0] = n[0].wrapping_sub(2 & carry);
172    n[4] = n[4].wrapping_add(TWO_P4 & carry);
173    let _ = prop(n);
174}
175
176/// `c <- a * b mod 2p`. Schoolbook 5x5 multiplication in radix `2^51`
177/// with interleaved Montgomery folding: each newly-emitted low limb
178/// `v_i` is added back at limb position `i + 4` as `v_i * P4`, which
179/// represents `v_i * 5 * 2^248 = v_i * (p + 1) = v_i (mod p)` and
180/// effectively divides the final result by Montgomery `R = 2^255`.
181#[inline]
182pub(crate) fn modmul(c: &mut [u64; NLIMBS], a: &[u64; NLIMBS], b: &[u64; NLIMBS]) {
183    let mask = MASK;
184    let p4: u128 = P4 as u128;
185
186    // t accumulates the partial sum at the current limb position. After
187    // emitting each limb (`t & mask`) we shift right by RADIX.
188    let mut t: u128;
189
190    t = (a[0] as u128) * (b[0] as u128);
191    let v0 = (t as u64) & mask;
192    t >>= RADIX;
193
194    t = t
195        .wrapping_add((a[0] as u128) * (b[1] as u128))
196        .wrapping_add((a[1] as u128) * (b[0] as u128));
197    let v1 = (t as u64) & mask;
198    t >>= RADIX;
199
200    t = t
201        .wrapping_add((a[0] as u128) * (b[2] as u128))
202        .wrapping_add((a[1] as u128) * (b[1] as u128))
203        .wrapping_add((a[2] as u128) * (b[0] as u128));
204    let v2 = (t as u64) & mask;
205    t >>= RADIX;
206
207    t = t
208        .wrapping_add((a[0] as u128) * (b[3] as u128))
209        .wrapping_add((a[1] as u128) * (b[2] as u128))
210        .wrapping_add((a[2] as u128) * (b[1] as u128))
211        .wrapping_add((a[3] as u128) * (b[0] as u128));
212    let v3 = (t as u64) & mask;
213    t >>= RADIX;
214
215    t = t
216        .wrapping_add((a[0] as u128) * (b[4] as u128))
217        .wrapping_add((a[1] as u128) * (b[3] as u128))
218        .wrapping_add((a[2] as u128) * (b[2] as u128))
219        .wrapping_add((a[3] as u128) * (b[1] as u128))
220        .wrapping_add((a[4] as u128) * (b[0] as u128))
221        .wrapping_add((v0 as u128) * p4);
222    let v4 = (t as u64) & mask;
223    t >>= RADIX;
224
225    t = t
226        .wrapping_add((a[1] as u128) * (b[4] as u128))
227        .wrapping_add((a[2] as u128) * (b[3] as u128))
228        .wrapping_add((a[3] as u128) * (b[2] as u128))
229        .wrapping_add((a[4] as u128) * (b[1] as u128))
230        .wrapping_add((v1 as u128) * p4);
231    c[0] = (t as u64) & mask;
232    t >>= RADIX;
233
234    t = t
235        .wrapping_add((a[2] as u128) * (b[4] as u128))
236        .wrapping_add((a[3] as u128) * (b[3] as u128))
237        .wrapping_add((a[4] as u128) * (b[2] as u128))
238        .wrapping_add((v2 as u128) * p4);
239    c[1] = (t as u64) & mask;
240    t >>= RADIX;
241
242    t = t
243        .wrapping_add((a[3] as u128) * (b[4] as u128))
244        .wrapping_add((a[4] as u128) * (b[3] as u128))
245        .wrapping_add((v3 as u128) * p4);
246    c[2] = (t as u64) & mask;
247    t >>= RADIX;
248
249    t = t
250        .wrapping_add((a[4] as u128) * (b[4] as u128))
251        .wrapping_add((v4 as u128) * p4);
252    c[3] = (t as u64) & mask;
253    t >>= RADIX;
254
255    c[4] = t as u64;
256}
257
258/// `c <- a * a mod 2p` (specialized squaring). Each cross-term
259/// `a_i * a_j` (`i != j`) is computed once and doubled, saving roughly
260/// half the partial-product work compared to a general multiply.
261#[inline]
262pub(crate) fn modsqr(c: &mut [u64; NLIMBS], a: &[u64; NLIMBS]) {
263    let mask = MASK;
264    let p4: u128 = P4 as u128;
265
266    let mut t: u128;
267    let mut tot: u128;
268
269    tot = (a[0] as u128) * (a[0] as u128);
270    t = tot;
271    let v0 = (t as u64) & mask;
272    t >>= RADIX;
273
274    tot = (a[0] as u128) * (a[1] as u128);
275    tot = tot.wrapping_mul(2);
276    t = t.wrapping_add(tot);
277    let v1 = (t as u64) & mask;
278    t >>= RADIX;
279
280    tot = (a[0] as u128) * (a[2] as u128);
281    tot = tot.wrapping_mul(2);
282    tot = tot.wrapping_add((a[1] as u128) * (a[1] as u128));
283    t = t.wrapping_add(tot);
284    let v2 = (t as u64) & mask;
285    t >>= RADIX;
286
287    tot = (a[0] as u128) * (a[3] as u128);
288    tot = tot.wrapping_add((a[1] as u128) * (a[2] as u128));
289    tot = tot.wrapping_mul(2);
290    t = t.wrapping_add(tot);
291    let v3 = (t as u64) & mask;
292    t >>= RADIX;
293
294    tot = (a[0] as u128) * (a[4] as u128);
295    tot = tot.wrapping_add((a[1] as u128) * (a[3] as u128));
296    tot = tot.wrapping_mul(2);
297    tot = tot.wrapping_add((a[2] as u128) * (a[2] as u128));
298    t = t.wrapping_add(tot);
299    t = t.wrapping_add((v0 as u128) * p4);
300    let v4 = (t as u64) & mask;
301    t >>= RADIX;
302
303    tot = (a[1] as u128) * (a[4] as u128);
304    tot = tot.wrapping_add((a[2] as u128) * (a[3] as u128));
305    tot = tot.wrapping_mul(2);
306    t = t.wrapping_add(tot);
307    t = t.wrapping_add((v1 as u128) * p4);
308    c[0] = (t as u64) & mask;
309    t >>= RADIX;
310
311    tot = (a[2] as u128) * (a[4] as u128);
312    tot = tot.wrapping_mul(2);
313    tot = tot.wrapping_add((a[3] as u128) * (a[3] as u128));
314    t = t.wrapping_add(tot);
315    t = t.wrapping_add((v2 as u128) * p4);
316    c[1] = (t as u64) & mask;
317    t >>= RADIX;
318
319    tot = (a[3] as u128) * (a[4] as u128);
320    tot = tot.wrapping_mul(2);
321    t = t.wrapping_add(tot);
322    t = t.wrapping_add((v3 as u128) * p4);
323    c[2] = (t as u64) & mask;
324    t >>= RADIX;
325
326    tot = (a[4] as u128) * (a[4] as u128);
327    t = t.wrapping_add(tot);
328    t = t.wrapping_add((v4 as u128) * p4);
329    c[3] = (t as u64) & mask;
330    t >>= RADIX;
331
332    c[4] = t as u64;
333}
334
335/// `c <- a`.
336#[inline]
337pub(crate) fn modcpy(c: &mut [u64; NLIMBS], a: &[u64; NLIMBS]) {
338    *c = *a;
339}
340
341/// Square `a` in-place `n` times.
342#[inline]
343pub(crate) fn modnsqr(a: &mut [u64; NLIMBS], n: u32) {
344    for _ in 0..n {
345        let mut tmp = [0u64; NLIMBS];
346        modsqr(&mut tmp, a);
347        *a = tmp;
348    }
349}
350
351/// Square root progenitor: `z <- w^((p-3)/4) mod p` via a fixed
352/// addition chain whose structure encodes the binary representation
353/// of `(p-3)/4` for this prime.
354#[inline]
355pub(crate) fn modpro(z: &mut [u64; NLIMBS], w: &[u64; NLIMBS]) {
356    let mut x = [0u64; NLIMBS];
357    let mut t0 = [0u64; NLIMBS];
358    let mut t1 = [0u64; NLIMBS];
359    let mut t2 = [0u64; NLIMBS];
360    let mut t3 = [0u64; NLIMBS];
361    let mut t4 = [0u64; NLIMBS];
362
363    modcpy(&mut x, w);
364    modsqr(z, &x);
365    modmul(&mut t0, &x, z);
366    modsqr(z, &t0);
367    {
368        let mut tmp = [0u64; NLIMBS];
369        modmul(&mut tmp, &x, z);
370        *z = tmp;
371    }
372    modsqr(&mut t1, z);
373    modsqr(&mut t3, &t1);
374    modsqr(&mut t2, &t3);
375    modcpy(&mut t4, &t2);
376    modnsqr(&mut t4, 3);
377    {
378        let mut tmp = [0u64; NLIMBS];
379        modmul(&mut tmp, &t2, &t4);
380        t2 = tmp;
381    }
382    modcpy(&mut t4, &t2);
383    modnsqr(&mut t4, 6);
384    {
385        let mut tmp = [0u64; NLIMBS];
386        modmul(&mut tmp, &t2, &t4);
387        t2 = tmp;
388    }
389    modcpy(&mut t4, &t2);
390    modnsqr(&mut t4, 2);
391    {
392        let mut tmp = [0u64; NLIMBS];
393        modmul(&mut tmp, &t3, &t4);
394        t3 = tmp;
395    }
396    modnsqr(&mut t3, 13);
397    {
398        let mut tmp = [0u64; NLIMBS];
399        modmul(&mut tmp, &t2, &t3);
400        t2 = tmp;
401    }
402    modcpy(&mut t3, &t2);
403    modnsqr(&mut t3, 27);
404    {
405        let mut tmp = [0u64; NLIMBS];
406        modmul(&mut tmp, &t2, &t3);
407        t2 = tmp;
408    }
409    {
410        let mut tmp = [0u64; NLIMBS];
411        modmul(&mut tmp, z, &t2);
412        *z = tmp;
413    }
414    modcpy(&mut t2, z);
415    modnsqr(&mut t2, 4);
416    {
417        let mut tmp = [0u64; NLIMBS];
418        modmul(&mut tmp, &t1, &t2);
419        t1 = tmp;
420    }
421    {
422        let mut tmp = [0u64; NLIMBS];
423        modmul(&mut tmp, &t0, &t1);
424        t0 = tmp;
425    }
426    {
427        let mut tmp = [0u64; NLIMBS];
428        modmul(&mut tmp, &t1, &t0);
429        t1 = tmp;
430    }
431    {
432        let mut tmp = [0u64; NLIMBS];
433        modmul(&mut tmp, &t0, &t1);
434        t0 = tmp;
435    }
436    {
437        let mut tmp = [0u64; NLIMBS];
438        modmul(&mut tmp, &t1, &t0);
439        t2 = tmp;
440    }
441    {
442        let mut tmp = [0u64; NLIMBS];
443        modmul(&mut tmp, &t0, &t2);
444        t0 = tmp;
445    }
446    {
447        let mut tmp = [0u64; NLIMBS];
448        modmul(&mut tmp, &t1, &t0);
449        t1 = tmp;
450    }
451    modnsqr(&mut t1, 63);
452    {
453        let mut tmp = [0u64; NLIMBS];
454        modmul(&mut tmp, &t0, &t1);
455        t1 = tmp;
456    }
457    modnsqr(&mut t1, 64);
458    {
459        let mut tmp = [0u64; NLIMBS];
460        modmul(&mut tmp, &t0, &t1);
461        t0 = tmp;
462    }
463    modnsqr(&mut t0, 57);
464    {
465        let mut tmp = [0u64; NLIMBS];
466        modmul(&mut tmp, z, &t0);
467        *z = tmp;
468    }
469}
470
471/// `modinv`: `z <- 1 / x mod p`. If `h` is `Some(h)` the precomputed
472/// progenitor `x^((p-3)/4)` is used; otherwise computed via `modpro`.
473#[inline]
474pub(crate) fn modinv(z: &mut [u64; NLIMBS], x: &[u64; NLIMBS], h: Option<&[u64; NLIMBS]>) {
475    let mut s = [0u64; NLIMBS];
476    let mut t = [0u64; NLIMBS];
477    match h {
478        None => modpro(&mut t, x),
479        Some(h) => modcpy(&mut t, h),
480    }
481    modcpy(&mut s, x);
482    modnsqr(&mut t, 2);
483    modmul(z, &s, &t);
484}
485
486/// Convert from canonical integer form to internal Montgomery form
487/// (multiplies by `R2_NRES` and divides by Montgomery `R`).
488#[inline]
489pub(crate) fn nres(n: &mut [u64; NLIMBS], m: &[u64; NLIMBS]) {
490    modmul(n, m, &R2_NRES);
491}
492
493/// Convert from internal Montgomery form back to canonical integer
494/// form, fully reducing modulo `p`.
495#[inline]
496pub(crate) fn redc(m: &mut [u64; NLIMBS], n: &[u64; NLIMBS]) {
497    let mut c = [0u64; NLIMBS];
498    c[0] = 1;
499    modmul(m, n, &c);
500    let _ = modfsb(m);
501}
502
503/// Returns `1` if `a == 0 mod p`, `0` otherwise.
504#[inline]
505pub(crate) fn modis0(a: &[u64; NLIMBS]) -> u32 {
506    let mut c = [0u64; NLIMBS];
507    redc(&mut c, a);
508    let mut d: u64 = 0;
509    for limb in c.iter() {
510        d |= *limb;
511    }
512    ((1u64) & ((d.wrapping_sub(1)) >> RADIX)) as u32
513}
514
515/// Returns `1` if `a == 1 mod p`, `0` otherwise.
516#[inline]
517pub(crate) fn modis1(a: &[u64; NLIMBS]) -> u32 {
518    let mut c = [0u64; NLIMBS];
519    redc(&mut c, a);
520    let mut d: u64 = 0;
521    for limb in c.iter().skip(1) {
522        d |= *limb;
523    }
524    let c0 = c[0];
525    ((1u64) & ((d.wrapping_sub(1)) >> RADIX) & (((c0 ^ 1).wrapping_sub(1)) >> RADIX)) as u32
526}
527
528/// Returns `1` if `a == b mod p`, `0` otherwise. Constant-time: folds
529/// the per-limb differences into a single bit using masked XOR.
530#[inline]
531pub(crate) fn modcmp(a: &[u64; NLIMBS], b: &[u64; NLIMBS]) -> u32 {
532    let mut c = [0u64; NLIMBS];
533    let mut d = [0u64; NLIMBS];
534    redc(&mut c, a);
535    redc(&mut d, b);
536    let mut eq: u64 = 1;
537    for i in 0..NLIMBS {
538        eq &= ((c[i] ^ d[i]).wrapping_sub(1)) >> RADIX;
539    }
540    eq &= 1;
541    eq as u32
542}
543
544/// Returns `1` if `x` is a quadratic residue (or zero), `0`
545/// otherwise. If `h` is `Some`, it is interpreted as the precomputed
546/// progenitor `x^((p-3)/4)`; otherwise the progenitor is computed.
547#[inline]
548pub(crate) fn modqr(x: &[u64; NLIMBS], h: Option<&[u64; NLIMBS]>) -> u32 {
549    let mut r = [0u64; NLIMBS];
550    match h {
551        None => {
552            modpro(&mut r, x);
553            let mut r2 = [0u64; NLIMBS];
554            modsqr(&mut r2, &r);
555            r = r2;
556        }
557        Some(h) => {
558            modsqr(&mut r, h);
559        }
560    }
561    let mut r2 = [0u64; NLIMBS];
562    modmul(&mut r2, &r, x);
563    let r = r2;
564    modis1(&r) | modis0(x)
565}
566
567/// `r <- sqrt(x) mod p`. Since `p = 3 mod 4`, the square root is
568/// `x^((p+1)/4) = x * x^((p-3)/4)`, so the progenitor (cached as `h`
569/// when available) is multiplied by `x` to get the answer. Output is
570/// well-defined modulo a sign.
571#[inline]
572pub(crate) fn modsqrt(r: &mut [u64; NLIMBS], x: &[u64; NLIMBS], h: Option<&[u64; NLIMBS]>) {
573    let mut y = [0u64; NLIMBS];
574    let mut s = [0u64; NLIMBS];
575    match h {
576        None => modpro(&mut y, x),
577        Some(h) => modcpy(&mut y, h),
578    }
579    modmul(&mut s, &y, x);
580    modcpy(r, &s);
581}
582
583/// Set `a` to the small integer `x` in internal Montgomery form.
584#[inline]
585pub(crate) fn modint(a: &mut [u64; NLIMBS], x: u64) {
586    a[0] = x;
587    a[1] = 0;
588    a[2] = 0;
589    a[3] = 0;
590    a[4] = 0;
591    let a_copy = *a;
592    nres(a, &a_copy);
593}
594
595/// `c <- a * x mod 2p` for a small integer `x`.
596#[inline]
597pub(crate) fn modmli(c: &mut [u64; NLIMBS], a: &[u64; NLIMBS], x: u64) {
598    let mut t = [0u64; NLIMBS];
599    modint(&mut t, x);
600    modmul(c, a, &t);
601}
602
603/// Shift `a` left by `n < 51` bits (per-limb, with carry into the
604/// next limb).
605#[inline]
606pub(crate) fn modshl(a: &mut [u64; NLIMBS], n: u32) {
607    a[4] = (a[4] << n) | (a[3] >> (RADIX - n));
608    for i in (1..=3).rev() {
609        a[i] = ((a[i] << n) & MASK) | (a[i - 1] >> (RADIX - n));
610    }
611    a[0] = (a[0] << n) & MASK;
612}
613
614/// Shift `a` right by `n < 51` bits, returning the low `n`
615/// shifted-out bits.
616#[inline]
617pub(crate) fn modshr(a: &mut [u64; NLIMBS], n: u32) -> u64 {
618    let r = a[0] & ((1u64 << n) - 1);
619    for i in 0..4 {
620        a[i] = (a[i] >> n) | ((a[i + 1] << (RADIX - n)) & MASK);
621    }
622    a[4] >>= n;
623    r
624}
625
626/// Constant-time conditional swap of `f` and `g` if `b == 1`, no-op
627/// if `b == 0`.
628#[inline]
629pub(crate) fn modcsw(b: u64, g: &mut [u64; NLIMBS], f: &mut [u64; NLIMBS]) {
630    let r: u64 = 0x3cc3_c33c_5aa5_a55a;
631    let c0 = (1u64.wrapping_sub(b)).wrapping_add(r);
632    let c1 = b.wrapping_add(r);
633    for i in 0..NLIMBS {
634        let s = g[i];
635        let t = f[i];
636        let w = r.wrapping_mul(t.wrapping_add(s));
637        let new_f = c0
638            .wrapping_mul(t)
639            .wrapping_add(c1.wrapping_mul(s))
640            .wrapping_sub(w);
641        let new_g = c0
642            .wrapping_mul(s)
643            .wrapping_add(c1.wrapping_mul(t))
644            .wrapping_sub(w);
645        f[i] = new_f;
646        g[i] = new_g;
647    }
648}
649
650/// Bytes per encoded Fp element for Level 1.
651const ENCODED_BYTES: usize = 32;
652
653/// Serialize `a` (in internal Montgomery form) to its canonical
654/// 32-byte little-endian representation: convert back to canonical
655/// integer form via [`redc`], then peel off one byte at a time.
656#[inline]
657pub(crate) fn fp_encode(out: &mut [u8], a: &[u64; NLIMBS]) {
658    debug_assert!(out.len() >= ENCODED_BYTES);
659    let mut c = [0u64; NLIMBS];
660    redc(&mut c, a);
661    for byte in out.iter_mut().take(ENCODED_BYTES) {
662        *byte = (c[0] & 0xff) as u8;
663        let _ = modshr(&mut c, 8);
664    }
665}
666
667/// Parse 32 canonical little-endian bytes into an Fp element.
668/// Returns `0xFFFF_FFFF` on in-range input, `0` otherwise; on failure
669/// `out` is zeroed.
670#[inline]
671pub(crate) fn fp_decode(out: &mut [u64; NLIMBS], bytes: &[u8]) -> u32 {
672    if bytes.len() < ENCODED_BYTES {
673        *out = [0; NLIMBS];
674        return 0;
675    }
676    *out = [0; NLIMBS];
677    // Build the integer by shifting in one byte at a time, MSB first.
678    for i in (0..ENCODED_BYTES).rev() {
679        modshl(out, 8);
680        out[0] = out[0].wrapping_add(bytes[i] as u64);
681    }
682    // res is all-ones if the value was in `[0, p)`, all-zeros otherwise.
683    let res_u64 = 0u64.wrapping_sub(modfsb(out) as u64);
684    let res_u32 = res_u64 as u32;
685    let out_copy = *out;
686    nres(out, &out_copy);
687    for limb in out.iter_mut() {
688        *limb &= res_u64;
689    }
690    res_u32
691}
692
693/// Partial reduction of a 4-limb 64-bit-saturated accumulator: split
694/// off the top byte of limb 3 and fold it back into the low limbs
695/// using the identity `5 * 2^248 = 1 (mod p)`. Used by
696/// [`fp_decode_reduce`] to absorb each 32-byte chunk of a long input.
697fn partial_reduce_4(out: &mut [u64; 4], src: &[u64; 4]) {
698    let h = src[3] >> 56;
699    let l = src[3] & 0x00FF_FFFF_FFFF_FFFF;
700    // Add floor(h/5) + (h mod 5) * 2^248 to the low part.
701    let quo = (h.wrapping_mul(0xCD)) >> 10;
702    let rem = h.wrapping_sub(5u64.wrapping_mul(quo));
703    let (r0, c0) = src[0].overflowing_add(quo);
704    let (r1, c1) = src[1].overflowing_add(c0 as u64);
705    let (r2, c2) = src[2].overflowing_add(c1 as u64);
706    let r3 = l.wrapping_add(rem << 56).wrapping_add(c2 as u64);
707    out[0] = r0;
708    out[1] = r1;
709    out[2] = r2;
710    out[3] = r3;
711}
712
713/// Parse a little-endian byte string of arbitrary length, reducing it
714/// modulo `p`. Always succeeds. Used to map hash output uniformly into
715/// Fp.
716#[inline]
717pub(crate) fn fp_decode_reduce(out: &mut [u64; NLIMBS], bytes: &[u8]) {
718    *out = [0; NLIMBS];
719    if bytes.is_empty() {
720        return;
721    }
722
723    let mut len = bytes.len();
724    let rem = len % ENCODED_BYTES;
725    if rem != 0 {
726        // Decode a partial trailing block (zero-padded to 32 bytes;
727        // the value is already < p so no reduction is needed).
728        let k = len - rem;
729        let mut tmp = [0u8; ENCODED_BYTES];
730        tmp[..(len - k)].copy_from_slice(&bytes[k..]);
731        let _ = fp_decode(out, &tmp);
732        len = k;
733    }
734
735    while len > 0 {
736        // Shift the accumulator left by 2^256 in the Montgomery sense:
737        // multiplying by `R2` (which represents 2^256) makes room for
738        // the next 32-byte chunk in the high bits.
739        let out_copy = *out;
740        modmul(out, &out_copy, &R2);
741        len -= ENCODED_BYTES;
742        let mut t = [0u64; 4];
743        for (j, t_j) in t.iter_mut().enumerate() {
744            let off = len + j * 8;
745            // SAFETY: slice length is exactly 8, matching the [u8; 8] target
746            *t_j = u64::from_le_bytes(
747                bytes[off..off + 8]
748                    .try_into()
749                    .expect("invariant: slice length is exactly 8"),
750            );
751        }
752        let mut reduced = [0u64; 4];
753        partial_reduce_4(&mut reduced, &t);
754        let mut tmp_bytes = [0u8; ENCODED_BYTES];
755        for (j, r) in reduced.iter().enumerate() {
756            tmp_bytes[j * 8..(j + 1) * 8].copy_from_slice(&r.to_le_bytes());
757        }
758        let mut a = [0u64; NLIMBS];
759        let _ = fp_decode(&mut a, &tmp_bytes);
760        let out_copy = *out;
761        modadd(out, &out_copy, &a);
762    }
763}
764
765/// Borrow a `&Array<u64, U5>` as `&[u64; 5]` for the limb-level helpers.
766#[inline]
767fn as_arr(a: &Array<u64, <Level1 as crate::params::SecurityLevel>::FpLimbs>) -> &[u64; NLIMBS] {
768    <&[u64; NLIMBS]>::try_from(&a[..]).expect("invariant: Level1 FpLimbs == U5")
769}
770
771/// Mutable borrow of `&mut Array<u64, U5>` as `&mut [u64; 5]`.
772#[inline]
773fn as_arr_mut(
774    a: &mut Array<u64, <Level1 as crate::params::SecurityLevel>::FpLimbs>,
775) -> &mut [u64; NLIMBS] {
776    <&mut [u64; NLIMBS]>::try_from(&mut a[..]).expect("invariant: Level1 FpLimbs == U5")
777}
778
779impl FpBackend for Level1 {
780    #[inline]
781    fn set_zero(out: &mut Array<u64, Self::FpLimbs>) {
782        *as_arr_mut(out) = ZERO;
783    }
784
785    #[inline]
786    fn set_one(out: &mut Array<u64, Self::FpLimbs>) {
787        *as_arr_mut(out) = ONE;
788    }
789
790    #[inline]
791    fn set_small(out: &mut Array<u64, Self::FpLimbs>, val: u64) {
792        modint(as_arr_mut(out), val);
793    }
794
795    #[inline]
796    fn is_equal(a: &Array<u64, Self::FpLimbs>, b: &Array<u64, Self::FpLimbs>) -> Choice {
797        Choice::from(modcmp(as_arr(a), as_arr(b)) as u8)
798    }
799
800    #[inline]
801    fn is_zero(a: &Array<u64, Self::FpLimbs>) -> Choice {
802        Choice::from(modis0(as_arr(a)) as u8)
803    }
804
805    #[inline]
806    fn copy(out: &mut Array<u64, Self::FpLimbs>, a: &Array<u64, Self::FpLimbs>) {
807        modcpy(as_arr_mut(out), as_arr(a));
808    }
809
810    #[inline]
811    fn add(
812        out: &mut Array<u64, Self::FpLimbs>,
813        a: &Array<u64, Self::FpLimbs>,
814        b: &Array<u64, Self::FpLimbs>,
815    ) {
816        modadd(as_arr_mut(out), as_arr(a), as_arr(b));
817    }
818
819    #[inline]
820    fn sub(
821        out: &mut Array<u64, Self::FpLimbs>,
822        a: &Array<u64, Self::FpLimbs>,
823        b: &Array<u64, Self::FpLimbs>,
824    ) {
825        modsub(as_arr_mut(out), as_arr(a), as_arr(b));
826    }
827
828    #[inline]
829    fn neg(out: &mut Array<u64, Self::FpLimbs>, a: &Array<u64, Self::FpLimbs>) {
830        modneg(as_arr_mut(out), as_arr(a));
831    }
832
833    #[inline]
834    fn mul(
835        out: &mut Array<u64, Self::FpLimbs>,
836        a: &Array<u64, Self::FpLimbs>,
837        b: &Array<u64, Self::FpLimbs>,
838    ) {
839        modmul(as_arr_mut(out), as_arr(a), as_arr(b));
840    }
841
842    #[inline]
843    fn sqr(out: &mut Array<u64, Self::FpLimbs>, a: &Array<u64, Self::FpLimbs>) {
844        modsqr(as_arr_mut(out), as_arr(a));
845    }
846
847    #[inline]
848    fn inv(out: &mut Array<u64, Self::FpLimbs>, a: &Array<u64, Self::FpLimbs>) {
849        let a_copy = *as_arr(a);
850        modinv(as_arr_mut(out), &a_copy, None);
851    }
852
853    #[inline]
854    fn sqrt(out: &mut Array<u64, Self::FpLimbs>, a: &Array<u64, Self::FpLimbs>) {
855        let a_copy = *as_arr(a);
856        modsqrt(as_arr_mut(out), &a_copy, None);
857    }
858
859    #[inline]
860    fn is_square(a: &Array<u64, Self::FpLimbs>) -> Choice {
861        Choice::from(modqr(as_arr(a), None) as u8)
862    }
863
864    #[inline]
865    fn half(out: &mut Array<u64, Self::FpLimbs>, a: &Array<u64, Self::FpLimbs>) {
866        modmul(as_arr_mut(out), &TWO_INV, as_arr(a));
867    }
868
869    #[inline]
870    fn div3(out: &mut Array<u64, Self::FpLimbs>, a: &Array<u64, Self::FpLimbs>) {
871        modmul(as_arr_mut(out), &THREE_INV, as_arr(a));
872    }
873
874    #[inline]
875    fn exp3div4(out: &mut Array<u64, Self::FpLimbs>, a: &Array<u64, Self::FpLimbs>) {
876        modpro(as_arr_mut(out), as_arr(a));
877    }
878
879    #[inline]
880    fn mul_small(out: &mut Array<u64, Self::FpLimbs>, a: &Array<u64, Self::FpLimbs>, val: u32) {
881        modmli(as_arr_mut(out), as_arr(a), val as u64);
882    }
883
884    #[inline]
885    fn encode(out: &mut [u8], a: &Array<u64, Self::FpLimbs>) {
886        fp_encode(out, as_arr(a));
887    }
888
889    #[inline]
890    fn decode(out: &mut Array<u64, Self::FpLimbs>, bytes: &[u8]) -> Choice {
891        Choice::from((fp_decode(as_arr_mut(out), bytes) & 1) as u8)
892    }
893
894    #[inline]
895    fn decode_reduce(out: &mut Array<u64, Self::FpLimbs>, bytes: &[u8]) {
896        fp_decode_reduce(as_arr_mut(out), bytes);
897    }
898
899    #[inline]
900    fn cswap(a: &mut Array<u64, Self::FpLimbs>, b: &mut Array<u64, Self::FpLimbs>, ctl: Choice) {
901        modcsw(ctl.unwrap_u8() as u64, as_arr_mut(a), as_arr_mut(b));
902    }
903
904    #[inline]
905    fn select(
906        out: &mut Array<u64, Self::FpLimbs>,
907        a0: &Array<u64, Self::FpLimbs>,
908        a1: &Array<u64, Self::FpLimbs>,
909        ctl: Choice,
910    ) {
911        let cw = 0u64.wrapping_sub(ctl.unwrap_u8() as u64);
912        let a0r = as_arr(a0);
913        let a1r = as_arr(a1);
914        let out_r = as_arr_mut(out);
915        for i in 0..NLIMBS {
916            out_r[i] = a0r[i] ^ (cw & (a0r[i] ^ a1r[i]));
917        }
918    }
919}