Skip to main content

class_groups/crypto_bigint/
composition.rs

1//! Composition of binary quadratic forms in constant time.
2//!
3//! This is generic to the underlying container representing the numbers, allowing usage with both
4//! allocating and non-allocating backends. However, the module as a whole is in a weird place.
5//! Specifically, it optimizes some functions, and leverages incompleteness elsewhere (tailored to
6//! the specific special cases encountered during reduction), but the optimized functions are _not_
7//! a majority of the runtime (or even a notable part). Instead, the GCD, divisions are, and those
8//! are completely deferred. Additionally, despite being generic to the underlying containers and
9//! not explicitly allocating, this code does make frequent use of `clone` (presumably copies for
10//! elements represented on the stack) due to requiring a non-trivial amount of scratch variables.
11//!
12//! The important aspect of this code is that it's clearly bounded as necessary to verify the
13//! correctness of the implemented algortithm. Despite this, the implemented algorithm is not
14//! itself proven here, it being Algorithm 5.4.7 Composition of Positive Definite Forms from
15//! A Course in Computational Algebraic Number Theory by Henri Cohen (as commonly implemented in
16//! its optimized variant as "NUCOMP"). Per a remark, and also Chapter 5, Exercise 9, the composite
17//! of two primitive forms is itself primitive.
18
19use crypto_bigint::{CtEq, CtSelect as _, CtAssign, Choice, Limb};
20
21use super::I;
22
23/// Calculate half the sum of two signed integers which are congruent modulo two.
24///
25/// This is equivalent to `(a + b) / 2`, which only has an integer solution when
26/// $a \cong b \mod 2$. A wrapping implementation of `a + b`, then divided by two, may be incorrect
27/// (due to truncating the highest bit). This ensures a correct solution via the methodology of
28/// `(a / 2) + (b / 2)`, adding `1` if $a \cong 1 \mod 2$ (which is guaranteed to fit in the
29/// result).
30///
31/// This is intended to calculate `b_1 + b_2`, as part of composition, where for a binary
32/// quadratic form $b^2 - 4ac = delta$, it is apparent how $b \cong delta \mod 2$. Therefore, for
33/// any two binary quadratic forms of the same discriminant, their `b` coefficients are congruent
34/// modulo two.
35///
36/// This assigns the absolute value of the result to the first argument. The returned value is if
37/// the result is positive. The number `0` MAY be considered positive or negative.
38///
39/// This assumes `a.1, b.1` have an equivalent amount of limbs.
40// NOTE: The composition algorithm requires `2 a` by representable in the container of limbs, so
41// we could simplify this algorithm accordingly. This algorithm is a negligible part of the
42// reduction, so being complete here is preferred.
43fn sum_assign_half_of_two_numbers_congruent_mod_two(
44  a: (Choice, &mut [Limb]),
45  b: (Choice, &[Limb]),
46) -> Choice {
47  // This is addition if they have the same sign and subtraction otherwise
48  let add = a.0.ct_eq(&b.0);
49
50  /*
51    If we are adding two odd numbers, the trailing bits would sum to `2`. As we're calculating
52    _half_ the sum, we need to add one to whatever the result is. If we are adding two even
53    numbers, the trailing bits sum to `0`, and we do not have to add anything.
54
55    Because we assume these numbers are congruent modulo two, we determine odd/even solely by
56    inspecting `a`.
57
58    Note we could minorly optimize this by restricting to odd discriminants, as we do within
59    reduction (by simply setting `carry = Limb::ONE`). The assumption in reduction is used to save
60    ~5% of the runtime, as it allows deferring when the `b` coefficient is negated. Here, checking
61    the least significant bit is trivial, so it's preferable to be complete.
62  */
63  let mut carry = a.1[0] & Limb::ONE;
64  /*
65    Because these numbers are congruent modulo 2, their trailing bits are the same and have a
66    difference of `0`, so we do not have to do anything about them.
67  */
68  let mut borrow = Limb::ZERO;
69
70  for i in 0 .. (a.1.len() - 1) {
71    let a_shr1: Limb = (a.1[i + 1] << (Limb::BITS - 1)) | (a.1[i] >> 1);
72    let b_shr1 = (b.1[i + 1] << (Limb::BITS - 1)) | (b.1[i] >> 1);
73
74    let if_add;
75    (if_add, carry) = a_shr1.carrying_add(b_shr1, carry);
76    let if_sub;
77    (if_sub, borrow) = a_shr1.borrowing_sub(b_shr1, borrow);
78
79    // Write the new limb value
80    a.1[i] = Limb::ct_select(&if_sub, &if_add, add);
81  }
82
83  {
84    let i = a.1.len() - 1;
85
86    let a_shr1: Limb = a.1[i] >> 1;
87    let b_shr1 = b.1[i] >> 1;
88
89    let (if_add, _carry) = a_shr1.carrying_add(b_shr1, carry);
90    let if_sub;
91    (if_sub, borrow) = a_shr1.borrowing_sub(b_shr1, borrow);
92
93    // Write the new limb value
94    a.1[i] = Limb::ct_select(&if_sub, &if_add, add);
95  }
96
97  let a_gte_b = borrow.is_zero();
98  // If this was a subtraction and `a < b`, negate the result
99  {
100    let a_lt_b = !a_gte_b;
101    let mut carry = Limb::from(u8::from(a_lt_b & (!add)));
102    let mask = Limb::ZERO.wrapping_sub(carry);
103    for limb in a.1 {
104      let new_limb;
105      (new_limb, carry) = ((*limb) ^ mask).carrying_add(Limb::ZERO, carry);
106      *limb = new_limb;
107    }
108  }
109
110  /*
111    `carry` MUST be zero as half the sum of two numbers has at most equal bit-length to the greater
112    of the inputs'. `borrow` MAY be non-zero if `a < b`, in which case the result has sign
113    equivalent to `b`'s. Specifically, we should output for the sign:
114
115          | Same Sign | Different Signs |
116    ------|-----------------------------|
117    a < b | a.0 = b.0 |       b.0       |
118    a > b | a.0 = b.0 |       a.0       |
119    a = b | a.0 = b.0 |        ?        |
120
121    "?" refers to how we are allowed to return either sign for `0`.
122
123    From the above table, it's clear how our choice of which of the inputs' signs is solely
124    important when the inputs have different signs, allowing us to very simply define the result.
125  */
126  Choice::ct_select(&b.0, &a.0, a_gte_b)
127}
128
129/// Calculate the difference of two signed integers.
130///
131/// This assigns the absolute value of the result to the first argument, sans the most significant
132/// bit. The returned values are if the result is positive and the most significant bit, in that
133/// order. The number `0` MAY be considered positive or negative.
134///
135/// This assumes `a.1, b.1` have an equivalent amount of limbs and that the result will fit in the
136/// same amount of limbs.
137fn diff_assign(a: (Choice, &mut [Limb]), b: (Choice, &[Limb])) -> Choice {
138  // This is subtraction if they have the same sign and addition otherwise
139  let sub = a.0.ct_eq(&b.0);
140
141  let mut carry = Limb::ZERO;
142  let mut borrow = Limb::ZERO;
143  for (a_limb, b_limb) in a.1.iter_mut().zip(b.1) {
144    let if_add;
145    (if_add, carry) = a_limb.carrying_add(*b_limb, carry);
146    let if_sub;
147    (if_sub, borrow) = a_limb.borrowing_sub(*b_limb, borrow);
148    *a_limb = Limb::ct_select(&if_add, &if_sub, sub);
149  }
150
151  let a_gte_b = borrow.is_zero();
152  // If this was a subtraction and `a < b`, negate the result
153  {
154    let a_lt_b = !a_gte_b;
155    let mut carry = Limb::from(u8::from(a_lt_b & sub));
156    let mask = Limb::ZERO.wrapping_sub(carry);
157    for limb in a.1 {
158      let new_limb;
159      (new_limb, carry) = ((*limb) ^ mask).carrying_add(Limb::ZERO, carry);
160      *limb = new_limb;
161    }
162  }
163
164  {
165    // If we performed an addition, the sign is that of the first number
166    let if_add = a.0;
167    // If we performed a subtraction, the sign is that of the larger number, but negated if it was
168    // the second number
169    let if_sub = Choice::ct_select(&!b.0, &a.0, a_gte_b);
170    Choice::ct_select(&if_add, &if_sub, sub)
171  }
172}
173
174/// The result of an extended GCD algorithm.
175pub(super) struct Xgcd<U> {
176  /// The greatest common denominator.
177  pub(super) d: U,
178  /// The `u` coefficient such that `ua + vb = d`.
179  pub(super) u: I<U>,
180  /// The `v` coefficient such that `ua + vb = d`.
181  pub(super) v: I<U>,
182}
183
184trait LimbHelpers: Sized + AsRef<[Limb]> + AsMut<[Limb]> {
185  /// Double the input.
186  ///
187  /// This assumes the result will fit within the same amount of limbs.
188  fn double(mut self) -> Self {
189    let mut carry = Limb::ZERO;
190    for limb in self.as_mut() {
191      let new_limb = ((*limb) << 1) | carry;
192      carry = (*limb) >> 63;
193      *limb = new_limb;
194    }
195    #[cfg(debug_assertions)]
196    {
197      debug_assert!(bool::from(carry.is_zero()));
198    }
199    self
200  }
201
202  /// Negate `self` modulo `modulus` if `negate == true`.
203  ///
204  /// This assumes `self` and `modulus` have the same amount of limbs and that `self <= modulus`.
205  /// This will return if the input was `0` (as an integer, not as congruent to) and the value.
206  /// The value may be the modulus (as congruent to `0`) but only if the input was `0` and `negate`
207  /// was `true`.
208  fn ct_neg_mod(mut self, modulus: &Self, negate: Choice) -> (Choice, Self) {
209    let mut is_zero = Limb::ZERO;
210    let mut borrow = Limb::ZERO;
211    for (our_limb, mod_limb) in self.as_mut().iter_mut().zip(modulus.as_ref()) {
212      // Check if the input was zero, which is relatively cheap to do when we're already iterating
213      // over this value
214      is_zero |= *our_limb;
215      let new_limb;
216      (new_limb, borrow) = mod_limb.borrowing_sub(*our_limb, borrow);
217      our_limb.ct_assign(&new_limb, negate);
218    }
219    (is_zero.is_zero(), self)
220  }
221
222  /// Subtract `b` from `self` modulo `modulus`.
223  ///
224  /// This assumes `self`, `b`, and `modulus` have the same amount of limbs and both `self` and `b`
225  /// are less than or equal to the modulus, and may return the modulus to represent `0` when asked
226  /// to calculate `modulus - 0`.
227  fn sub_mod(mut self, b: &Self, modulus: &Self) -> Self {
228    let mut borrow = Limb::ZERO;
229    for (our_limb, b_limb) in self.as_mut().iter_mut().zip(b.as_ref()) {
230      let new_limb;
231      (new_limb, borrow) = our_limb.borrowing_sub(*b_limb, borrow);
232      *our_limb = new_limb;
233    }
234
235    // Add the modulus if this underflowed
236    let underflowed = !borrow.is_zero();
237    let mut carry = Limb::ZERO;
238    for (our_limb, mod_limb) in self.as_mut().iter_mut().zip(modulus.as_ref()) {
239      let new_limb;
240      (new_limb, carry) = our_limb.carrying_add(*mod_limb, carry);
241      our_limb.ct_assign(&new_limb, underflowed);
242    }
243
244    self
245  }
246}
247impl<T: Sized + AsRef<[Limb]> + AsMut<[Limb]>> LimbHelpers for T {}
248
249#[expect(private_bounds)]
250pub(super) trait WideLimbs<Thin>: Clone + LimbHelpers {
251  /// Calculate `self % denom`.
252  ///
253  /// Callers MUST NOT pass `denom = 0`.
254  fn rem(self, denom: &Thin) -> Thin;
255}
256
257/// The required view over a collection of limbs to calculate the `c` coefficient.
258///
259/// Implementations MUST implement all functions in time constant to the value of the inputs,
260/// except for the amount of limbs, unless otherwise stated. Implementations MUST NOT panic for any
261/// input which the caller MAY pass.
262#[expect(private_bounds)]
263pub(crate) trait Limbs:
264  Clone + AsRef<[Limb]> + AsMut<[Limb]> + CtEq + CtAssign + LimbHelpers
265{
266  /// A wider container capable of storing the product of any two values representable within this
267  /// container.
268  type Wide: WideLimbs<Self>;
269
270  /// Calculate the GCD `d` of `self, other` (as `a, b`) and the coefficients such that
271  /// `ua + vb = d`.
272  ///
273  /// Callers MUST ensure the inputs have the same amount of limbs. Callers MUST NOT pass
274  /// `a = 0` or `b = 0`.
275  ///
276  /// Implementations MUST return values with the same amount of limbs as the inputs.
277  #[expect(private_interfaces)]
278  fn xgcd(self, other: Self) -> Xgcd<Self>;
279
280  /// Calculate `self / denom` where `denom | self`.
281  ///
282  /// Callers MUST ensure the inputs have the same amount of limbs. Callers MUST NOT pass
283  /// `denom = 0`.
284  ///
285  /// Implementations MUST return a value with the same amount of limbs as the numerator.
286  fn div(self, denom: &Self) -> Self;
287
288  /// Multiply two values modulo `modulus`.
289  ///
290  /// Callers MUST ensure the inputs have the same amount of limbs. Callers MUST NOT pass
291  /// `modulus = 0`.
292  ///
293  /// Implementations MUST support any factors, not just those less than the modulus.
294  /// Implementations MUST return a value with the same amount of limbs as the modulus.
295  fn mul_mod(&self, other: &Self, modulus: &Self) -> Self {
296    self.mul(other).rem(modulus)
297  }
298
299  /// Multiply two values into a wide value.
300  fn mul(&self, other: &Self) -> Self::Wide;
301
302  /// Square a value into a wide value.
303  fn square(&self) -> Self::Wide {
304    self.mul(self)
305  }
306}
307
308/// Compose two positive definite binary quadratic forms.
309///
310/// This requires boths form be well-defined, primitive, and with the same negative discriminant.
311/// Neither form is explicitly bound to be reduced however.
312///
313/// This function assumes:
314/// - `floor(log_2(a1)) + 1 < AsRef::<[Limb]>::as_ref(&a1).len() * Limb::BITS`
315/// - `floor(log_2(a2)) + 1 < AsRef::<[Limb]>::as_ref(&a2).len() * Limb::BITS`
316/// - `AsRef::<[Limb]>::as_ref(&a1).len() == AsRef::<[Limb]>::as_ref(&a2).len()`
317/// - `AsRef::<[Limb]>::as_ref(&a1).len() == AsRef::<[Limb]>::as_ref(&b2.1).len()`
318/// - `AsRef::<[Limb]>::as_ref(&b1.1).len() == AsRef::<[Limb]>::as_ref(&b2.1).len()`
319///
320/// This returns the unreduced `a', b'` coefficients of the resulting form, with the following
321/// bounds:
322/// - $a' < 2^(floor(log_2(a1 * a2)) + 1)$
323/// - $b' < 2^(1 + max(floor(log_2(|b2|)), floor(log_2(2 * a1 * a2))) + 1)$
324#[expect(clippy::needless_pass_by_value)]
325pub(crate) fn add<U: Limbs>(
326  a1: U,
327  mut b1: I<U>,
328  a2: U,
329  b2: I<U>,
330  c2: U::Wide,
331) -> (U::Wide, I<U::Wide>) {
332  /*
333    `s = (b1 + b2) / 2`
334
335    If the discriminant is odd, each `b` coefficient must be odd. If the discriminant is even, each
336    `b` coefficient must be even. Accordingly, the `b` coefficients are congruent modulo `2`.
337
338    The input is bounded such that these have an equivalent amount of limbs.
339  */
340  let s: I<U> = {
341    let sign = sum_assign_half_of_two_numbers_congruent_mod_two(
342      (b1.0, b1.1.as_mut()),
343      (b2.0, b2.1.as_ref()),
344    );
345    (sign, b1.1)
346  };
347
348  let (d1, x2, y1, y2): (U, I<U>, I<U>, I<U>) = {
349    /*
350      This corresponds to step 2 of Algorithm 5.4.7 (the first Euclidean step). The described step
351      short-circuits when `a1` is a factor of `a2`, but it's noted the general solution would work
352      as well and that is solely an optimization. It's trivial to review it and confirm the
353      short-circuit simply yields what the general solution would otherwise.
354    */
355    let (d, y1) = {
356      /*
357        `a1, a2` are non-zero for negative discriminants as with `b^2 - 4ac = -|delta|`, there
358        would be no satisfaction if `a = 0`.
359      */
360      let xgcd = a2.clone().xgcd(a1.clone());
361      (xgcd.d, xgcd.u)
362    };
363
364    /*
365      This corresponds to step 3 (the second Euclidean step), which again has a described step
366      which short-circuits when `d` is a factor of `s`. When `s` is non-zero, the general solution
367      yields an equivalent result to the short-circuit. When `s` is zero, we do need to explicitly
368      set `x_2 = 0, y_2 = -1, d_1 = d` however.
369
370      `d` is non-zero as it's a factor of `a1, a2`.
371    */
372    let s_is_zero = {
373      let mut s_is_zero = Limb::ZERO;
374      for limb in s.1.as_ref() {
375        s_is_zero |= *limb;
376      }
377      s_is_zero.is_zero()
378    };
379
380    /*
381      If `s` is zero, set it to one so the following GCD call is well-defined.
382
383      NOTE: This is probably extraneous. When one input is zero, most XGCD algorithms do still have
384      a well-defined outputs, which we can probably bound to be the output required here without
385      too many problems. At worst, the implementation provided by the underlying `Limbs`
386      implementation would have to finess it as necessary.
387
388      It's quite simple, and of negligible performance impact, to bound it here however and
389      simplify the `Limbs` API accordingly.
390    */
391    let mut s_for_gcd = s.1.clone();
392    s_for_gcd.as_mut()[0] |= Limb::from(u8::from(s_is_zero));
393    let xgcd = s_for_gcd.xgcd(d.clone());
394
395    let mut d1 = xgcd.d;
396    // `d_1 = d` when `s = 0`
397    d1.ct_assign(&d, s_is_zero);
398
399    let mut x2 = xgcd.u;
400    // If `s` was negative, correct the sign for its coefficient
401    x2.0 ^= !s.0;
402    // `x_2 = 0` when `s = 0`
403    for x2_limb in x2.1.as_mut() {
404      x2_limb.ct_assign(&Limb::ZERO, s_is_zero);
405    }
406
407    // `y_2 = -v`, hence why this negates the sign of `xgcd.v`
408    let mut y2 = (!xgcd.v.0, xgcd.v.1);
409    // `y_2 = -1` when `s = 0`
410    y2.0.ct_assign(&Choice::FALSE, s_is_zero);
411    y2.1.as_mut()[0].ct_assign(&Limb::ONE, s_is_zero);
412    for y2_limb in &mut y2.1.as_mut()[1 ..] {
413      y2_limb.ct_assign(&Limb::ZERO, s_is_zero);
414    }
415
416    (d1, x2, y1, y2)
417  };
418
419  /*
420    `n = (b_1 - b_2) / 2 = ((b_1 + b_2) / 2) - b_2 = -(b_2 - s)`
421
422    `diff_assign` assumes the result will fit within the output container. `(b1 - b2) / 2` will
423    have bit-length at most equivalent to `max(b1, b2)`, so this assumption is upheld.
424
425    We do return `b_2 - s`, not `-(b_2 - s)`, as the described algorithm says to. This is distinct
426    from the more academic description preceding it which does define `n` as we do above, but in
427    either case, the absolute value of the result is bounded as we needed.
428  */
429  let n: I<U> = {
430    let mut b2 = (b2.0, b2.1.clone());
431    let sign = diff_assign((b2.0, b2.1.as_mut()), (s.0, s.1.as_ref()));
432    (sign, b2.1)
433  };
434
435  // `d1` is a factor of `d`, the greatest common factor of `a1, a2`, and therefore non-zero
436  let v1: U = a1.div(&d1);
437  let v2: U = a2.div(&d1);
438
439  /*
440    This computes `r` via the two parts of its equation, before taking their difference, entirely
441    over the stated modulus. Note `mul_mod` is bound to be perfect, yet the `ct_neg_mod`, `sub_mod`
442    helpers may yield `0 ..= modulus`, not `0 .. modulus`.
443
444    As `modulus` is representable in `U`, this isn't an issue except at the very end as we expect
445    `r` to be `0 ..= modulus`. Accordingly, at the very end, we do ensure the accuracy of this
446    value.
447
448    While this is silly, handling the edge case the value is the modulus at the very end avoids
449    doing it at each step.
450  */
451  let r = {
452    let r1 = y1.1.mul_mod(&y2.1, &v1).mul_mod(&n.1, &v1);
453    let r1_is_negative = (!y1.0) ^ (!y2.0) ^ (!n.0);
454    let (r1_was_zero, r1) = r1.ct_neg_mod(&v1, r1_is_negative);
455    let r1_is_modulus = r1_was_zero & r1_is_negative;
456
457    let r2 = x2.1.mul_mod(&c2.rem(&v1), &v1);
458    let (r2_was_zero, r2) = r2.ct_neg_mod(&v1, !x2.0);
459    let r2_is_zero = r2_was_zero & x2.0;
460
461    let mut r_unreduced = r1.sub_mod(&r2, &v1);
462    let r_unreduced_is_modulus = r1_is_modulus & r2_is_zero;
463    for limb in r_unreduced.as_mut() {
464      limb.ct_assign(&Limb::ZERO, r_unreduced_is_modulus);
465    }
466    r_unreduced
467  };
468
469  /*
470    Because `U` can store `2 a`, `U::Wide` can store `4 a3`.
471
472    `2 v2 r < 2 a3` as, by expansion, `2 v2 (_ % v1) < 2 v2 v1`.
473
474    We then just have to prove `b2 < 2 a3`, which is trivial as `2 a3` is bounded to
475    `U::Wide::BITS - 1` and `b2` is bounded to `U::BITS`, where
476    `U::Wide::BITS >= (2 * U::BITS) - 1` (due to the requirement `U::Wide` is able to store any
477    product of any values representable in `U`).
478
479    This completes the proof `b3` is representable in `U::Wide` when `b2 >= 0`. When `b2 < 0`, the
480    proof is trivial as `2 v2 r >= 0`, so the absolute value will be `<= max(|b2|, 2 v2 r)`.
481  */
482  let mut b3 = v2.mul(&r).double();
483  let b3_is_negative = {
484    let mut carry = Limb::ZERO;
485    let mut borrow = Limb::ZERO;
486    let mut b3_limbs = b3.as_mut().iter_mut();
487    for (b3_limb, b2_limb) in
488      (&mut b3_limbs).zip(b2.1.as_ref().iter().chain(core::iter::repeat(&Limb::ZERO)))
489    {
490      let if_add;
491      (if_add, carry) = b3_limb.carrying_add(*b2_limb, carry);
492      let if_sub;
493      (if_sub, borrow) = b3_limb.borrowing_sub(*b2_limb, borrow);
494
495      *b3_limb = Limb::ct_select(&if_sub, &if_add, b2.0);
496    }
497
498    // Clear the borrow if this was an addition, not a subtraction
499    borrow.ct_assign(&Limb::ZERO, b2.0);
500
501    // If the underflowed, negate `b3`
502    let underflowed = !borrow.is_zero();
503    let mut carry = Limb::from(u8::from(underflowed));
504    let mask = Limb::ZERO.wrapping_sub(carry);
505    for b3_limb in b3.as_mut() {
506      let new_limb;
507      (new_limb, carry) = ((*b3_limb) ^ mask).carrying_add(Limb::ZERO, carry);
508      *b3_limb = new_limb;
509    }
510
511    /*
512      This value is set to `b3_is_negative`. The resulting `b` will NOT be `-0` as if `b2 + 2 v2 r`
513      underflowed, the absolute value of the result is non-zero (as `2 v2 r - |b2|` would only
514      underflow if `|b2| > 2 v2 r`).
515    */
516    underflowed
517  };
518
519  let a3: U::Wide = v1.mul(&v2);
520
521  (a3, (!b3_is_negative, b3))
522}
523
524/// Compose a positive definite binary quadratic form with itself.
525///
526/// This requires the form be well-defined, primitive, and with negative _odd_ discriminant. The
527/// form is not bound to be reduced however.
528///
529/// This function assumes:
530/// - `floor(log_2(a)) + 1 < AsRef::<[Limb]>::as_ref(&a).len() * Limb::BITS`
531/// - `AsRef::<[Limb]>::as_ref(&a).len() == AsRef::<[Limb]>::as_ref(&b.1).len()`
532///
533/// This returns the unreduced `a', b'` coefficients of the resulting form, with the following
534/// bounds:
535/// - $a' < 2^(floor(log_2(a^2)) + 1)$
536/// - $b' < 2^(1 + max(floor(log_2(|b|)), floor(log_2(2 * a^2))) + 1)$
537//
538// This is the above `add` function, specialized for the case the forms are the same. Comments
539// which would be duplicated between the two functions are omitted.
540#[expect(clippy::needless_pass_by_value)]
541pub(crate) fn double<U: Limbs>(a: U, b: I<U>, c: U::Wide) -> (U::Wide, I<U::Wide>) {
542  // Because we bound that `delta` is _odd_, we know `s` is non-zero, as $b \cong delta \mod 2$
543  let s = b.clone();
544  let (d1, x2): (U, I<U>) = {
545    // `d = a1, y1 = 0`
546    let d = a.clone();
547
548    let xgcd = s.1.xgcd(d);
549
550    let d1 = xgcd.d;
551    let mut x2 = xgcd.u;
552    x2.0 ^= !s.0;
553
554    (d1, x2)
555  };
556
557  let v1: U = a.div(&d1);
558
559  let r = {
560    // `r1 = 0` as `y1 = 0` (and as `n = 0`)
561
562    let r2 = x2.1.mul_mod(&c.rem(&v1), &v1);
563    let (r2_was_zero, mut r2) = r2.ct_neg_mod(&v1, !x2.0);
564    let r2_is_zero = r2_was_zero & x2.0;
565
566    // `r = 0 - r2 = -r2`
567    let mut borrow = Limb::ZERO;
568    for (mod_limb, r2_limb) in v1.as_ref().iter().zip(r2.as_mut()) {
569      let new_limb;
570      (new_limb, borrow) = mod_limb.borrowing_sub(*r2_limb, borrow);
571      *r2_limb = <_>::ct_select(&new_limb, &Limb::ZERO, r2_is_zero);
572    }
573    r2
574  };
575
576  let mut b3 = v1.mul(&r).double();
577  let b3_is_negative = {
578    let mut carry = Limb::ZERO;
579    let mut borrow = Limb::ZERO;
580    let mut b3_limbs = b3.as_mut().iter_mut();
581    for (b3_limb, b_limb) in
582      (&mut b3_limbs).zip(b.1.as_ref().iter().chain(core::iter::repeat(&Limb::ZERO)))
583    {
584      let if_add;
585      (if_add, carry) = b3_limb.carrying_add(*b_limb, carry);
586      let if_sub;
587      (if_sub, borrow) = b3_limb.borrowing_sub(*b_limb, borrow);
588
589      *b3_limb = Limb::ct_select(&if_sub, &if_add, b.0);
590    }
591
592    borrow.ct_assign(&Limb::ZERO, b.0);
593
594    let underflowed = !borrow.is_zero();
595    let mut carry = Limb::from(u8::from(underflowed));
596    let mask = Limb::ZERO.wrapping_sub(carry);
597    for b3_limb in b3.as_mut() {
598      let new_limb;
599      (new_limb, carry) = ((*b3_limb) ^ mask).carrying_add(Limb::ZERO, carry);
600      *b3_limb = new_limb;
601    }
602
603    underflowed
604  };
605
606  let a3: U::Wide = v1.square();
607
608  (a3, (!b3_is_negative, b3))
609}