Skip to main content

crypto_bigint/uint/
div_limb.rs

1//! Implementation of constant-time division via reciprocal precomputation, as described in
2//! "Improved Division by Invariant Integers" by Niels Möller and Torbjorn Granlund
3//! (DOI: 10.1109/TC.2010.143, <https://gmplib.org/~tege/division-paper.pdf>).
4
5use crate::{
6    Choice, CtSelect, Limb, NonZero, Uint, WideWord, Word,
7    primitives::{addhilo, widening_mul},
8    word,
9};
10
11cpubits::cpubits! {
12    32 => {
13        /// Calculates the reciprocal of the given 32-bit divisor with the highmost bit set.
14        pub const fn reciprocal(d: Word) -> Word {
15            debug_assert!(d >= (1 << (Word::BITS - 1)));
16
17            let d0 = d & 1;
18            let d10 = d >> 22;
19            let d21 = (d >> 11) + 1;
20            let d31 = (d >> 1) + d0;
21            let v0 = short_div((1 << 24) - (1 << 14) + (1 << 9), 24, d10, 10);
22            let (_lo, hi) = widening_mul(v0 * v0, d21);
23            let v1 = (v0 << 4) - hi - 1;
24
25            // Checks that the expression for `e` can be simplified in the way we did below.
26            debug_assert!(widening_mul(v1, d31).1 == (1 << 16) - 1);
27            let e = Word::MAX - v1.wrapping_mul(d31) + 1 + (v1 >> 1) * d0;
28
29            let (_lo, hi) = widening_mul(v1, e);
30            // Note: the paper does not mention a wrapping add here,
31            // but the 64-bit version has it at this stage, and the function panics without it
32            // when calculating a reciprocal for `Word::MAX`.
33            let v2 = (v1 << 15).wrapping_add(hi >> 1);
34
35            // The paper has `(v2 + 1) * d / 2^32` (there's another 2^32, but it's accounted for later).
36            // If `v2 == 2^32-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
37            // Hence the `ct_select()`.
38            let x = v2.wrapping_add(1);
39            let (_lo, hi) = widening_mul(x, d);
40            let hi = word::select(d, hi, Choice::from_u32_nz(x));
41
42            v2.wrapping_sub(hi).wrapping_sub(d)
43        }
44    }
45    64 => {
46        /// Calculates the reciprocal of the given 64-bit divisor with the highmost bit set.
47        pub const fn reciprocal(d: Word) -> Word {
48            debug_assert!(d >= (1 << (Word::BITS - 1)));
49
50            let d0 = d & 1;
51            let d9 = d >> 55;
52            let d40 = (d >> 24) + 1;
53            let d63 = (d >> 1) + d0;
54            let v0 = short_div((1 << 19) - 3 * (1 << 8), 19, d9 as u32, 9) as u64;
55            let v1 = (v0 << 11) - ((v0 * v0 * d40) >> 40) - 1;
56            let v2 = (v1 << 13) + ((v1 * ((1 << 60) - v1 * d40)) >> 47);
57
58            // Checks that the expression for `e` can be simplified in the way we did below.
59            debug_assert!(widening_mul(v2, d63).1 == (1 << 32) - 1);
60            let e = Word::MAX - v2.wrapping_mul(d63) + 1 + (v2 >> 1) * d0;
61
62            let (_lo, hi) = widening_mul(v2, e);
63            let v3 = (v2 << 31).wrapping_add(hi >> 1);
64
65            // The paper has `(v3 + 1) * d / 2^64` (there's another 2^64, but it's accounted for later).
66            // If `v3 == 2^64-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
67            // Hence the `ct_select()`.
68            let x = v3.wrapping_add(1);
69            let (_lo, hi) = widening_mul(x, d);
70            let hi = word::select(d, hi, word::choice_from_nz(x));
71
72            v3.wrapping_sub(hi).wrapping_sub(d)
73        }
74    }
75}
76
77/// Calculates `dividend / divisor`, given `dividend` and `divisor`
78/// along with their maximum bitsizes.
79#[inline(always)]
80const fn short_div(mut dividend: u32, dividend_bits: u32, divisor: u32, divisor_bits: u32) -> u32 {
81    // TODO: this may be sped up even more using the fact that `dividend` is a known constant.
82
83    // In the paper this is a table lookup, but since we want it to be constant-time,
84    // we have to access all the elements of the table, which is quite large.
85    // So this shift-and-subtract approach is actually faster.
86
87    // Passing `dividend_bits` and `divisor_bits` because calling `.leading_zeros()`
88    // causes a significant slowdown, and we know those values anyway.
89
90    let mut divisor = divisor << (dividend_bits - divisor_bits);
91    let mut quotient: u32 = 0;
92    let mut i = dividend_bits - divisor_bits + 1;
93
94    while i > 0 {
95        i -= 1;
96        let bit = Choice::from_u32_lt(dividend, divisor);
97        dividend = bit.select_u32(dividend.wrapping_sub(divisor), dividend);
98        divisor >>= 1;
99        quotient |= bit.not().select_u32(0, 1 << i);
100    }
101
102    quotient
103}
104
105/// Calculate the quotient and the remainder of the division of a wide word
106/// (supplied as high and low words) by `d`, with a precalculated reciprocal `v`.
107#[inline(always)]
108pub(crate) const fn div2by1(u1: Word, u0: Word, reciprocal: &Reciprocal) -> (Word, Word) {
109    let d = reciprocal.divisor_normalized;
110
111    debug_assert!(d >= (1 << (Word::BITS - 1)));
112    debug_assert!(u1 < d);
113
114    let (q0, q1) = widening_mul(reciprocal.reciprocal, u1);
115    let (q0, q1) = addhilo(q0, q1, u0, u1);
116    let q1 = q1.wrapping_add(1);
117    let r = u0.wrapping_sub(q1.wrapping_mul(d));
118
119    let r_gt_q0 = word::choice_from_lt(q0, r);
120    let q1 = word::select(q1, q1.wrapping_sub(1), r_gt_q0);
121    let r = word::select(r, r.wrapping_add(d), r_gt_q0);
122
123    // If this was a normal `if`, we wouldn't need wrapping ops, because there would be no overflow.
124    // But since we calculate both results either way, we have to wrap.
125    // Added an assert to still check the lack of overflow in debug mode.
126    debug_assert!(r < d || q1 < Word::MAX);
127    let r_ge_d = word::choice_from_le(d, r);
128    let q1 = word::select(q1, q1.wrapping_add(1), r_ge_d);
129    let r = word::select(r, r.wrapping_sub(d), r_ge_d);
130
131    (q1, r)
132}
133
134/// Given two long integers `u = (..., u0, u1, u2)` and `v = (..., v0, v1)`
135/// (where `u2` and `v1` are the most significant limbs), where `floor(u / v) <= Limb::MAX`,
136/// calculates `q` such that `q - 1 <= floor(u / v) <= q`.
137/// In place of `v1` takes its reciprocal, and assumes that `v` was already pre-shifted
138/// so that v1 has its most significant bit set (that is, the reciprocal's `shift` is 0).
139#[inline(always)]
140pub(crate) const fn div3by2(
141    u2: Word,
142    u1: Word,
143    u0: Word,
144    v1_reciprocal: &Reciprocal,
145    v0: Word,
146) -> Word {
147    debug_assert!(v1_reciprocal.shift == 0);
148    debug_assert!(u2 <= v1_reciprocal.divisor_normalized);
149
150    // This method corresponds to Algorithm Q:
151    // https://janmr.com/blog/2014/04/basic-multiple-precision-long-division/
152
153    let q_maxed = word::choice_from_eq(u2, v1_reciprocal.divisor_normalized);
154    let (mut quo, rem) = div2by1(word::select(u2, 0, q_maxed), u1, v1_reciprocal);
155    // When the leading dividend word equals the leading divisor word, cap the quotient
156    // at Word::MAX and set the remainder to the sum of the top dividend words.
157    quo = word::select(quo, Word::MAX, q_maxed);
158    let mut rem = word::select_wide(
159        rem as WideWord,
160        (u2 as WideWord) + (u1 as WideWord),
161        q_maxed,
162    );
163
164    let mut i = 0;
165    while i < 2 {
166        let qy = (quo as WideWord) * (v0 as WideWord);
167        let rx = (rem << Word::BITS) | (u0 as WideWord);
168        // If r < b and q*y[-2] > r*x[-1], then set q = q - 1 and r = r + v1
169        let done =
170            word::choice_from_nz((rem >> Word::BITS) as Word).or(word::choice_from_wide_le(qy, rx));
171        quo = word::select(quo.wrapping_sub(1), quo, done);
172        rem = word::select_wide(
173            rem + (v1_reciprocal.divisor_normalized as WideWord),
174            rem,
175            done,
176        );
177        i += 1;
178    }
179
180    quo
181}
182
183/// A pre-calculated reciprocal for division by a single limb.
184#[derive(Copy, Clone, Debug, PartialEq, Eq)]
185pub struct Reciprocal {
186    divisor_normalized: Word,
187    shift: u32,
188    reciprocal: Word,
189}
190
191impl Reciprocal {
192    /// Pre-calculates a reciprocal for a known divisor,
193    /// to be used in the single-limb division later.
194    #[must_use]
195    pub const fn new(divisor: NonZero<Limb>) -> Self {
196        let divisor = divisor.0;
197
198        // Assuming this is constant-time for primitive types.
199        let shift = divisor.0.leading_zeros();
200
201        // Will not panic since divisor is non-zero
202        let divisor_normalized = divisor.0 << shift;
203
204        Self {
205            divisor_normalized,
206            shift,
207            reciprocal: reciprocal(divisor_normalized),
208        }
209    }
210
211    /// Returns a default instance of this object.
212    /// It is a self-consistent `Reciprocal` that will not cause panics in functions that take it.
213    ///
214    /// NOTE: intended for using it as a placeholder during compile-time array generation,
215    /// don't rely on the contents.
216    #[must_use]
217    pub const fn default() -> Self {
218        Self {
219            divisor_normalized: Word::MAX,
220            shift: 0,
221            // The result of calling `reciprocal(Word::MAX)`
222            // This holds both for 32- and 64-bit versions.
223            reciprocal: 1,
224        }
225    }
226
227    /// Get the shift value
228    #[must_use]
229    pub const fn shift(&self) -> u32 {
230        self.shift
231    }
232}
233
234impl CtSelect for Reciprocal {
235    fn ct_select(&self, other: &Self, choice: Choice) -> Self {
236        Self {
237            divisor_normalized: Word::ct_select(
238                &self.divisor_normalized,
239                &other.divisor_normalized,
240                choice,
241            ),
242            shift: u32::ct_select(&self.shift, &other.shift, choice),
243            reciprocal: Word::ct_select(&self.reciprocal, &other.reciprocal, choice),
244        }
245    }
246}
247
248// `CtOption.map()` needs this; for some reason it doesn't use the value it already has
249// for the `None` branch.
250impl Default for Reciprocal {
251    fn default() -> Self {
252        Self::default()
253    }
254}
255
256/// Divides `u` by the divisor encoded in the `reciprocal`, and returns the remainder.
257#[inline(always)]
258pub(crate) const fn rem_limb_with_reciprocal<const L: usize>(
259    u: &Uint<L>,
260    reciprocal: &Reciprocal,
261) -> Limb {
262    let (u_shifted, u_hi) = u.shl_limb_with_carry(reciprocal.shift, Limb::ZERO);
263    let mut r = u_hi.0;
264
265    let mut j = L;
266    while j > 0 {
267        j -= 1;
268        let (_, rj) = div2by1(r, u_shifted.as_limbs()[j].0, reciprocal);
269        r = rj;
270    }
271    Limb(r >> reciprocal.shift)
272}
273
274/// Computes `(a * b) % d`.
275#[inline(always)]
276pub(crate) const fn mul_rem(a: Limb, b: Limb, d: NonZero<Limb>) -> Limb {
277    let rec = Reciprocal::new(d);
278    let (lo, hi) = widening_mul(a.0, b.0);
279    rem_limb_with_reciprocal(&Uint::from_words([lo, hi]), &rec)
280}
281
282#[cfg(test)]
283mod tests {
284    use super::{Reciprocal, div2by1};
285    use crate::{Limb, NonZero, Word};
286    #[test]
287    fn div2by1_overflow() {
288        // A regression test for a situation when in div2by1() an operation (`q1 + 1`)
289        // that is protected from overflowing by a condition in the original paper (`r >= d`)
290        // still overflows because we're calculating the results for both branches.
291        let r = Reciprocal::new(NonZero::new(Limb(Word::MAX - 1)).unwrap());
292        assert_eq!(
293            div2by1(Word::MAX - 2, Word::MAX - 63, &r),
294            (Word::MAX, Word::MAX - 65)
295        );
296    }
297}