lambdaworks_math/unsigned_integer/
montgomery.rs

1use super::element::UnsignedInteger;
2
3pub struct MontgomeryAlgorithms;
4impl MontgomeryAlgorithms {
5    /// Compute CIOS multiplication of `a` * `b`
6    /// `q` is the modulus
7    /// `mu` is the inverse of -q modulo 2^{64}
8    /// Notice CIOS stands for Coarsely Integrated Operand Scanning
9    /// For more information see section 2.3.2 of Tolga Acar's thesis
10    /// https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
11    #[inline(always)]
12    pub const fn cios<const NUM_LIMBS: usize>(
13        a: &UnsignedInteger<NUM_LIMBS>,
14        b: &UnsignedInteger<NUM_LIMBS>,
15        q: &UnsignedInteger<NUM_LIMBS>,
16        mu: &u64,
17    ) -> UnsignedInteger<NUM_LIMBS> {
18        let mut t = [0_u64; NUM_LIMBS];
19        let mut t_extra = [0_u64; 2];
20        let mut i: usize = NUM_LIMBS;
21        while i > 0 {
22            i -= 1;
23            // C := 0
24            let mut c: u128 = 0;
25
26            // for j=N-1 to 0
27            //    (C,t[j]) := t[j] + a[j]*b[i] + C
28            let mut cs: u128;
29            let mut j: usize = NUM_LIMBS;
30            while j > 0 {
31                j -= 1;
32                cs = t[j] as u128 + (a.limbs[j] as u128) * (b.limbs[i] as u128) + c;
33                c = cs >> 64;
34                t[j] = cs as u64;
35            }
36
37            // (t_extra[0],t_extra[1]) := t_extra[1] + C
38            cs = (t_extra[1] as u128) + c;
39            t_extra[0] = (cs >> 64) as u64;
40            t_extra[1] = cs as u64;
41
42            let mut c: u128;
43
44            // m := t[N-1]*q'[N-1] mod D
45            let m = ((t[NUM_LIMBS - 1] as u128 * *mu as u128) << 64) >> 64;
46
47            // (C,_) := t[N-1] + m*q[N-1]
48            c = (t[NUM_LIMBS - 1] as u128 + m * (q.limbs[NUM_LIMBS - 1] as u128)) >> 64;
49
50            // for j=N-1 to 1
51            //    (C,t[j+1]) := t[j] + m*q[j] + C
52            let mut j: usize = NUM_LIMBS - 1;
53            while j > 0 {
54                j -= 1;
55                cs = t[j] as u128 + m * (q.limbs[j] as u128) + c;
56                c = cs >> 64;
57                t[j + 1] = ((cs << 64) >> 64) as u64;
58            }
59
60            // (C,t[0]) := t_extra[1] + C
61            cs = (t_extra[1] as u128) + c;
62            c = cs >> 64;
63            t[0] = ((cs << 64) >> 64) as u64;
64
65            // t_extra[1] := t_extra[0] + C
66            t_extra[1] = t_extra[0] + c as u64;
67        }
68        let mut result = UnsignedInteger { limbs: t };
69
70        let overflow = t_extra[1] > 0;
71
72        if overflow || UnsignedInteger::const_le(q, &result) {
73            (result, _) = UnsignedInteger::sub(&result, q);
74        }
75        result
76    }
77
78    /// Compute CIOS multiplication of `a` * `b`
79    /// This is the Algorithm 2 described in the paper
80    /// "EdMSM: Multi-Scalar-Multiplication for SNARKs and Faster Montgomery multiplication"
81    /// https://eprint.iacr.org/2022/1400.pdf.
82    /// It is only suited for moduli with `q[0]` smaller than `2^63 - 1`.
83    /// `q` is the modulus
84    /// `mu` is the inverse of -q modulo 2^{64}
85    #[inline(always)]
86    pub fn cios_optimized_for_moduli_with_one_spare_bit<const NUM_LIMBS: usize>(
87        a: &UnsignedInteger<NUM_LIMBS>,
88        b: &UnsignedInteger<NUM_LIMBS>,
89        q: &UnsignedInteger<NUM_LIMBS>,
90        mu: &u64,
91    ) -> UnsignedInteger<NUM_LIMBS> {
92        let mut t = [0_u64; NUM_LIMBS];
93        let mut t_extra;
94        let mut i: usize = NUM_LIMBS;
95        while i > 0 {
96            i -= 1;
97            // C := 0
98            let mut c: u128 = 0;
99
100            // for j=N-1 to 0
101            //    (C,t[j]) := t[j] + a[j]*b[i] + C
102            let mut cs: u128;
103            let mut j: usize = NUM_LIMBS;
104            while j > 0 {
105                j -= 1;
106                cs = t[j] as u128 + (a.limbs[j] as u128) * (b.limbs[i] as u128) + c;
107                c = cs >> 64;
108                t[j] = cs as u64;
109            }
110
111            t_extra = c as u64;
112
113            let mut c: u128;
114
115            // m := t[N-1]*q'[N-1] mod D
116            let m = ((t[NUM_LIMBS - 1] as u128 * *mu as u128) << 64) >> 64;
117
118            // (C,_) := t[0] + m*q[0]
119            c = (t[NUM_LIMBS - 1] as u128 + m * (q.limbs[NUM_LIMBS - 1] as u128)) >> 64;
120
121            // for j=N-1 to 1
122            //    (C,t[j+1]) := t[j] + m*q[j] + C
123            let mut j: usize = NUM_LIMBS - 1;
124            while j > 0 {
125                j -= 1;
126                cs = t[j] as u128 + m * (q.limbs[j] as u128) + c;
127                c = cs >> 64;
128                t[j + 1] = ((cs << 64) >> 64) as u64;
129            }
130
131            // (C,t[0]) := t_extra + C
132            cs = (t_extra as u128) + c;
133            t[0] = ((cs << 64) >> 64) as u64;
134        }
135        let mut result = UnsignedInteger { limbs: t };
136
137        if UnsignedInteger::const_le(q, &result) {
138            (result, _) = UnsignedInteger::sub(&result, q);
139        }
140        result
141    }
142
143    // Separated Operand Scanning Method (2.3.1)
144    #[inline(always)]
145    pub fn sos_square<const NUM_LIMBS: usize>(
146        a: &UnsignedInteger<NUM_LIMBS>,
147        q: &UnsignedInteger<NUM_LIMBS>,
148        mu: &u64,
149    ) -> UnsignedInteger<NUM_LIMBS> {
150        // NOTE: we use explicit `while` loops in this function because profiling pointed
151        // at iterators of the form `(<x>..<y>).rev()` as the main performance bottleneck.
152
153        // Step 1: Compute `(hi, lo) = a * a`
154        let (mut hi, mut lo) = UnsignedInteger::square(a);
155
156        // Step 2: Add terms to `(hi, lo)` until multiple it
157        // is a multiple of both `2^{NUM_LIMBS * 64}` and
158        // `q`.
159        let mut c: u128 = 0;
160        let mut i = NUM_LIMBS;
161        let mut overflow = false;
162        while i > 0 {
163            i -= 1;
164            c = 0;
165            let m = (lo.limbs[i] as u128 * *mu as u128) as u64;
166            let mut j = NUM_LIMBS;
167            while j > 0 {
168                j -= 1;
169                if i + j >= NUM_LIMBS - 1 {
170                    let index = i + j - (NUM_LIMBS - 1);
171                    let cs = lo.limbs[index] as u128 + m as u128 * (q.limbs[j] as u128) + c;
172                    c = cs >> 64;
173                    lo.limbs[index] = cs as u64;
174                } else {
175                    let index = i + j + 1;
176                    let cs = hi.limbs[index] as u128 + m as u128 * (q.limbs[j] as u128) + c;
177                    c = cs >> 64;
178                    hi.limbs[index] = cs as u64;
179                }
180            }
181
182            // Carry propagation to `hi`
183            let mut t = 0;
184            while c > 0 && i >= t {
185                let cs = hi.limbs[i - t] as u128 + c;
186                c = cs >> 64;
187                hi.limbs[i - t] = cs as u64;
188                t += 1;
189            }
190            overflow |= c > 0;
191        }
192
193        // Step 3: At this point `overflow * 2^{2 * NUM_LIMBS * 64} + (hi, lo)` is a multiple
194        // of `2^{NUM_LIMBS * 64}` and the result is obtained by dividing it by `2^{NUM_LIMBS * 64}`.
195        // In other words, `lo` is zero and the result is
196        // `overflow * 2^{NUM_LIMBS * 64} + hi`.
197        // That number is always strictly smaller than `2 * q`. To normalize it we substract
198        // `q` whenever it is larger than `q`.
199        // The easy case is when `overflow` is zero. We just use the `sub` function.
200        // If `overflow` is 1, then `hi` is smaller than `q`. The function `sub(hi, q)` wraps
201        // around `2^{NUM_LIMBS * 64}`. This is the result we need.
202        overflow |= c > 0;
203        if overflow || UnsignedInteger::const_le(q, &hi) {
204            (hi, _) = UnsignedInteger::sub(&hi, q);
205        }
206        hi
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use crate::unsigned_integer::{element::U384, montgomery::MontgomeryAlgorithms};
213
214    use proptest::prelude::*;
215
216    proptest! {
217        #[test]
218        fn cios_vs_cios_optimized(a in any::<[u64; 6]>(), b in any::<[u64; 6]>()) {
219            let x = U384::from_limbs(a);
220            let y = U384::from_limbs(b);
221            let m = U384::from_hex_unchecked("cdb061954fdd36e5176f50dbdcfd349570a29ce1"); // this is prime
222            let mu: u64 = 16085280245840369887; // negative of the inverse of `m` modulo 2^{64}
223            assert_eq!(
224                MontgomeryAlgorithms::cios(&x, &y, &m, &mu),
225                MontgomeryAlgorithms::cios_optimized_for_moduli_with_one_spare_bit(&x, &y, &m, &mu)
226            );
227        }
228
229        #[test]
230        fn cios_vs_sos_square(a in any::<[u64; 6]>()) {
231            let x = U384::from_limbs(a);
232            let m = U384::from_hex_unchecked("cdb061954fdd36e5176f50dbdcfd349570a29ce1"); // this is prime
233            let mu: u64 = 16085280245840369887; // negative of the inverse of `m` modulo 2^{64}
234            assert_eq!(
235                MontgomeryAlgorithms::cios(&x, &x, &m, &mu),
236                MontgomeryAlgorithms::sos_square(&x, &m, &mu)
237            );
238        }
239    }
240    #[test]
241    fn montgomery_multiplication_works_0() {
242        let x = U384::from_u64(11_u64);
243        let y = U384::from_u64(10_u64);
244        let m = U384::from_u64(23_u64); //
245        let mu: u64 = 3208129404123400281; // negative of the inverse of `m` modulo 2^{64}.
246        let c = U384::from_u64(13_u64); // x * y * (r^{-1}) % m, where r = 2^{64 * 6} and r^{-1} mod m = 2.
247        assert_eq!(MontgomeryAlgorithms::cios(&x, &y, &m, &mu), c);
248    }
249
250    #[test]
251    fn montgomery_multiplication_works_1() {
252        let x = U384::from_hex_unchecked("05ed176deb0e80b4deb7718cdaa075165f149c");
253        let y = U384::from_hex_unchecked("5f103b0bd4397d4df560eb559f38353f80eeb6");
254        let m = U384::from_hex_unchecked("cdb061954fdd36e5176f50dbdcfd349570a29ce1"); // this is prime
255        let mu: u64 = 16085280245840369887; // negative of the inverse of `m` modulo 2^{64}
256        let c = U384::from_hex_unchecked("8d65cdee621682815d59f465d2641eea8a1274dc"); // x * y * (r^{-1}) % m, where r = 2^{64 * 6}
257        assert_eq!(MontgomeryAlgorithms::cios(&x, &y, &m, &mu), c);
258    }
259
260    #[test]
261    fn montgomery_multiplication_works_2() {
262        let x = U384::from_hex_unchecked("8d65cdee621682815d59f465d2641eea8a1274dc");
263        let m = U384::from_hex_unchecked("cdb061954fdd36e5176f50dbdcfd349570a29ce1"); // this is prime
264        let r_mod_m = U384::from_hex_unchecked("58dfb0e1b3dd5e674bdcde4f42eb5533b8759d33");
265        let mu: u64 = 16085280245840369887; // negative of the inverse of `m` modulo 2^{64}
266        let c = U384::from_hex_unchecked("8d65cdee621682815d59f465d2641eea8a1274dc");
267        assert_eq!(MontgomeryAlgorithms::cios(&x, &r_mod_m, &m, &mu), c);
268    }
269}