class_groups/crypto_bigint/composition.rs
1//! Composition of binary quadratic forms in constant time.
2//!
3//! This is generic to the underlying container representing the numbers, allowing usage with both
4//! allocating and non-allocating backends. However, the module as a whole is in a weird place.
5//! Specifically, it optimizes some functions, and leverages incompleteness elsewhere (tailored to
6//! the specific special cases encountered during reduction), but the optimized functions are _not_
7//! a majority of the runtime (or even a notable part). Instead, the GCD, divisions are, and those
8//! are completely deferred. Additionally, despite being generic to the underlying containers and
9//! not explicitly allocating, this code does make frequent use of `clone` (presumably copies for
10//! elements represented on the stack) due to requiring a non-trivial amount of scratch variables.
11//!
12//! The important aspect of this code is that it's clearly bounded as necessary to verify the
13//! correctness of the implemented algortithm. Despite this, the implemented algorithm is not
14//! itself proven here, it being Algorithm 5.4.7 Composition of Positive Definite Forms from
15//! A Course in Computational Algebraic Number Theory by Henri Cohen (as commonly implemented in
16//! its optimized variant as "NUCOMP"). Per a remark, and also Chapter 5, Exercise 9, the composite
17//! of two primitive forms is itself primitive.
18
19use crypto_bigint::{CtEq, CtSelect as _, CtAssign, Choice, Limb};
20
21use super::I;
22
23/// Calculate half the sum of two signed integers which are congruent modulo two.
24///
25/// This is equivalent to `(a + b) / 2`, which only has an integer solution when
26/// $a \cong b \mod 2$. A wrapping implementation of `a + b`, then divided by two, may be incorrect
27/// (due to truncating the highest bit). This ensures a correct solution via the methodology of
28/// `(a / 2) + (b / 2)`, adding `1` if $a \cong 1 \mod 2$ (which is guaranteed to fit in the
29/// result).
30///
31/// This is intended to calculate `b_1 + b_2`, as part of composition, where for a binary
32/// quadratic form $b^2 - 4ac = delta$, it is apparent how $b \cong delta \mod 2$. Therefore, for
33/// any two binary quadratic forms of the same discriminant, their `b` coefficients are congruent
34/// modulo two.
35///
36/// This assigns the absolute value of the result to the first argument. The returned value is if
37/// the result is positive. The number `0` MAY be considered positive or negative.
38///
39/// This assumes `a.1, b.1` have an equivalent amount of limbs.
40// NOTE: The composition algorithm requires `2 a` by representable in the container of limbs, so
41// we could simplify this algorithm accordingly. This algorithm is a negligible part of the
42// reduction, so being complete here is preferred.
43fn sum_assign_half_of_two_numbers_congruent_mod_two(
44 a: (Choice, &mut [Limb]),
45 b: (Choice, &[Limb]),
46) -> Choice {
47 // This is addition if they have the same sign and subtraction otherwise
48 let add = a.0.ct_eq(&b.0);
49
50 /*
51 If we are adding two odd numbers, the trailing bits would sum to `2`. As we're calculating
52 _half_ the sum, we need to add one to whatever the result is. If we are adding two even
53 numbers, the trailing bits sum to `0`, and we do not have to add anything.
54
55 Because we assume these numbers are congruent modulo two, we determine odd/even solely by
56 inspecting `a`.
57
58 Note we could minorly optimize this by restricting to odd discriminants, as we do within
59 reduction (by simply setting `carry = Limb::ONE`). The assumption in reduction is used to save
60 ~5% of the runtime, as it allows deferring when the `b` coefficient is negated. Here, checking
61 the least significant bit is trivial, so it's preferable to be complete.
62 */
63 let mut carry = a.1[0] & Limb::ONE;
64 /*
65 Because these numbers are congruent modulo 2, their trailing bits are the same and have a
66 difference of `0`, so we do not have to do anything about them.
67 */
68 let mut borrow = Limb::ZERO;
69
70 for i in 0 .. (a.1.len() - 1) {
71 let a_shr1: Limb = (a.1[i + 1] << (Limb::BITS - 1)) | (a.1[i] >> 1);
72 let b_shr1 = (b.1[i + 1] << (Limb::BITS - 1)) | (b.1[i] >> 1);
73
74 let if_add;
75 (if_add, carry) = a_shr1.carrying_add(b_shr1, carry);
76 let if_sub;
77 (if_sub, borrow) = a_shr1.borrowing_sub(b_shr1, borrow);
78
79 // Write the new limb value
80 a.1[i] = Limb::ct_select(&if_sub, &if_add, add);
81 }
82
83 {
84 let i = a.1.len() - 1;
85
86 let a_shr1: Limb = a.1[i] >> 1;
87 let b_shr1 = b.1[i] >> 1;
88
89 let (if_add, _carry) = a_shr1.carrying_add(b_shr1, carry);
90 let if_sub;
91 (if_sub, borrow) = a_shr1.borrowing_sub(b_shr1, borrow);
92
93 // Write the new limb value
94 a.1[i] = Limb::ct_select(&if_sub, &if_add, add);
95 }
96
97 let a_gte_b = borrow.is_zero();
98 // If this was a subtraction and `a < b`, negate the result
99 {
100 let a_lt_b = !a_gte_b;
101 let mut carry = Limb::from(u8::from(a_lt_b & (!add)));
102 let mask = Limb::ZERO.wrapping_sub(carry);
103 for limb in a.1 {
104 let new_limb;
105 (new_limb, carry) = ((*limb) ^ mask).carrying_add(Limb::ZERO, carry);
106 *limb = new_limb;
107 }
108 }
109
110 /*
111 `carry` MUST be zero as half the sum of two numbers has at most equal bit-length to the greater
112 of the inputs'. `borrow` MAY be non-zero if `a < b`, in which case the result has sign
113 equivalent to `b`'s. Specifically, we should output for the sign:
114
115 | Same Sign | Different Signs |
116 ------|-----------------------------|
117 a < b | a.0 = b.0 | b.0 |
118 a > b | a.0 = b.0 | a.0 |
119 a = b | a.0 = b.0 | ? |
120
121 "?" refers to how we are allowed to return either sign for `0`.
122
123 From the above table, it's clear how our choice of which of the inputs' signs is solely
124 important when the inputs have different signs, allowing us to very simply define the result.
125 */
126 Choice::ct_select(&b.0, &a.0, a_gte_b)
127}
128
129/// Calculate the difference of two signed integers.
130///
131/// This assigns the absolute value of the result to the first argument, sans the most significant
132/// bit. The returned values are if the result is positive and the most significant bit, in that
133/// order. The number `0` MAY be considered positive or negative.
134///
135/// This assumes `a.1, b.1` have an equivalent amount of limbs and that the result will fit in the
136/// same amount of limbs.
137fn diff_assign(a: (Choice, &mut [Limb]), b: (Choice, &[Limb])) -> Choice {
138 // This is subtraction if they have the same sign and addition otherwise
139 let sub = a.0.ct_eq(&b.0);
140
141 let mut carry = Limb::ZERO;
142 let mut borrow = Limb::ZERO;
143 for (a_limb, b_limb) in a.1.iter_mut().zip(b.1) {
144 let if_add;
145 (if_add, carry) = a_limb.carrying_add(*b_limb, carry);
146 let if_sub;
147 (if_sub, borrow) = a_limb.borrowing_sub(*b_limb, borrow);
148 *a_limb = Limb::ct_select(&if_add, &if_sub, sub);
149 }
150
151 let a_gte_b = borrow.is_zero();
152 // If this was a subtraction and `a < b`, negate the result
153 {
154 let a_lt_b = !a_gte_b;
155 let mut carry = Limb::from(u8::from(a_lt_b & sub));
156 let mask = Limb::ZERO.wrapping_sub(carry);
157 for limb in a.1 {
158 let new_limb;
159 (new_limb, carry) = ((*limb) ^ mask).carrying_add(Limb::ZERO, carry);
160 *limb = new_limb;
161 }
162 }
163
164 {
165 // If we performed an addition, the sign is that of the first number
166 let if_add = a.0;
167 // If we performed a subtraction, the sign is that of the larger number, but negated if it was
168 // the second number
169 let if_sub = Choice::ct_select(&!b.0, &a.0, a_gte_b);
170 Choice::ct_select(&if_add, &if_sub, sub)
171 }
172}
173
174/// The result of an extended GCD algorithm.
175pub(super) struct Xgcd<U> {
176 /// The greatest common denominator.
177 pub(super) d: U,
178 /// The `u` coefficient such that `ua + vb = d`.
179 pub(super) u: I<U>,
180 /// The `v` coefficient such that `ua + vb = d`.
181 pub(super) v: I<U>,
182}
183
184trait LimbHelpers: Sized + AsRef<[Limb]> + AsMut<[Limb]> {
185 /// Double the input.
186 ///
187 /// This assumes the result will fit within the same amount of limbs.
188 fn double(mut self) -> Self {
189 let mut carry = Limb::ZERO;
190 for limb in self.as_mut() {
191 let new_limb = ((*limb) << 1) | carry;
192 carry = (*limb) >> 63;
193 *limb = new_limb;
194 }
195 #[cfg(debug_assertions)]
196 {
197 debug_assert!(bool::from(carry.is_zero()));
198 }
199 self
200 }
201
202 /// Negate `self` modulo `modulus` if `negate == true`.
203 ///
204 /// This assumes `self` and `modulus` have the same amount of limbs and that `self <= modulus`.
205 /// This will return if the input was `0` (as an integer, not as congruent to) and the value.
206 /// The value may be the modulus (as congruent to `0`) but only if the input was `0` and `negate`
207 /// was `true`.
208 fn ct_neg_mod(mut self, modulus: &Self, negate: Choice) -> (Choice, Self) {
209 let mut is_zero = Limb::ZERO;
210 let mut borrow = Limb::ZERO;
211 for (our_limb, mod_limb) in self.as_mut().iter_mut().zip(modulus.as_ref()) {
212 // Check if the input was zero, which is relatively cheap to do when we're already iterating
213 // over this value
214 is_zero |= *our_limb;
215 let new_limb;
216 (new_limb, borrow) = mod_limb.borrowing_sub(*our_limb, borrow);
217 our_limb.ct_assign(&new_limb, negate);
218 }
219 (is_zero.is_zero(), self)
220 }
221
222 /// Subtract `b` from `self` modulo `modulus`.
223 ///
224 /// This assumes `self`, `b`, and `modulus` have the same amount of limbs and both `self` and `b`
225 /// are less than or equal to the modulus, and may return the modulus to represent `0` when asked
226 /// to calculate `modulus - 0`.
227 fn sub_mod(mut self, b: &Self, modulus: &Self) -> Self {
228 let mut borrow = Limb::ZERO;
229 for (our_limb, b_limb) in self.as_mut().iter_mut().zip(b.as_ref()) {
230 let new_limb;
231 (new_limb, borrow) = our_limb.borrowing_sub(*b_limb, borrow);
232 *our_limb = new_limb;
233 }
234
235 // Add the modulus if this underflowed
236 let underflowed = !borrow.is_zero();
237 let mut carry = Limb::ZERO;
238 for (our_limb, mod_limb) in self.as_mut().iter_mut().zip(modulus.as_ref()) {
239 let new_limb;
240 (new_limb, carry) = our_limb.carrying_add(*mod_limb, carry);
241 our_limb.ct_assign(&new_limb, underflowed);
242 }
243
244 self
245 }
246}
247impl<T: Sized + AsRef<[Limb]> + AsMut<[Limb]>> LimbHelpers for T {}
248
249#[expect(private_bounds)]
250pub(super) trait WideLimbs<Thin>: Clone + LimbHelpers {
251 /// Calculate `self % denom`.
252 ///
253 /// Callers MUST NOT pass `denom = 0`.
254 fn rem(self, denom: &Thin) -> Thin;
255}
256
257/// The required view over a collection of limbs to calculate the `c` coefficient.
258///
259/// Implementations MUST implement all functions in time constant to the value of the inputs,
260/// except for the amount of limbs, unless otherwise stated. Implementations MUST NOT panic for any
261/// input which the caller MAY pass.
262#[expect(private_bounds)]
263pub(crate) trait Limbs:
264 Clone + AsRef<[Limb]> + AsMut<[Limb]> + CtEq + CtAssign + LimbHelpers
265{
266 /// A wider container capable of storing the product of any two values representable within this
267 /// container.
268 type Wide: WideLimbs<Self>;
269
270 /// Calculate the GCD `d` of `self, other` (as `a, b`) and the coefficients such that
271 /// `ua + vb = d`.
272 ///
273 /// Callers MUST ensure the inputs have the same amount of limbs. Callers MUST NOT pass
274 /// `a = 0` or `b = 0`.
275 ///
276 /// Implementations MUST return values with the same amount of limbs as the inputs.
277 #[expect(private_interfaces)]
278 fn xgcd(self, other: Self) -> Xgcd<Self>;
279
280 /// Calculate `self / denom` where `denom | self`.
281 ///
282 /// Callers MUST ensure the inputs have the same amount of limbs. Callers MUST NOT pass
283 /// `denom = 0`.
284 ///
285 /// Implementations MUST return a value with the same amount of limbs as the numerator.
286 fn div(self, denom: &Self) -> Self;
287
288 /// Multiply two values modulo `modulus`.
289 ///
290 /// Callers MUST ensure the inputs have the same amount of limbs. Callers MUST NOT pass
291 /// `modulus = 0`.
292 ///
293 /// Implementations MUST support any factors, not just those less than the modulus.
294 /// Implementations MUST return a value with the same amount of limbs as the modulus.
295 fn mul_mod(&self, other: &Self, modulus: &Self) -> Self {
296 self.mul(other).rem(modulus)
297 }
298
299 /// Multiply two values into a wide value.
300 fn mul(&self, other: &Self) -> Self::Wide;
301
302 /// Square a value into a wide value.
303 fn square(&self) -> Self::Wide {
304 self.mul(self)
305 }
306}
307
308/// Compose two positive definite binary quadratic forms.
309///
310/// This requires boths form be well-defined, primitive, and with the same negative discriminant.
311/// Neither form is explicitly bound to be reduced however.
312///
313/// This function assumes:
314/// - `floor(log_2(a1)) + 1 < AsRef::<[Limb]>::as_ref(&a1).len() * Limb::BITS`
315/// - `floor(log_2(a2)) + 1 < AsRef::<[Limb]>::as_ref(&a2).len() * Limb::BITS`
316/// - `AsRef::<[Limb]>::as_ref(&a1).len() == AsRef::<[Limb]>::as_ref(&a2).len()`
317/// - `AsRef::<[Limb]>::as_ref(&a1).len() == AsRef::<[Limb]>::as_ref(&b2.1).len()`
318/// - `AsRef::<[Limb]>::as_ref(&b1.1).len() == AsRef::<[Limb]>::as_ref(&b2.1).len()`
319///
320/// This returns the unreduced `a', b'` coefficients of the resulting form, with the following
321/// bounds:
322/// - $a' < 2^(floor(log_2(a1 * a2)) + 1)$
323/// - $b' < 2^(1 + max(floor(log_2(|b2|)), floor(log_2(2 * a1 * a2))) + 1)$
324#[expect(clippy::needless_pass_by_value)]
325pub(crate) fn add<U: Limbs>(
326 a1: U,
327 mut b1: I<U>,
328 a2: U,
329 b2: I<U>,
330 c2: U::Wide,
331) -> (U::Wide, I<U::Wide>) {
332 /*
333 `s = (b1 + b2) / 2`
334
335 If the discriminant is odd, each `b` coefficient must be odd. If the discriminant is even, each
336 `b` coefficient must be even. Accordingly, the `b` coefficients are congruent modulo `2`.
337
338 The input is bounded such that these have an equivalent amount of limbs.
339 */
340 let s: I<U> = {
341 let sign = sum_assign_half_of_two_numbers_congruent_mod_two(
342 (b1.0, b1.1.as_mut()),
343 (b2.0, b2.1.as_ref()),
344 );
345 (sign, b1.1)
346 };
347
348 let (d1, x2, y1, y2): (U, I<U>, I<U>, I<U>) = {
349 /*
350 This corresponds to step 2 of Algorithm 5.4.7 (the first Euclidean step). The described step
351 short-circuits when `a1` is a factor of `a2`, but it's noted the general solution would work
352 as well and that is solely an optimization. It's trivial to review it and confirm the
353 short-circuit simply yields what the general solution would otherwise.
354 */
355 let (d, y1) = {
356 /*
357 `a1, a2` are non-zero for negative discriminants as with `b^2 - 4ac = -|delta|`, there
358 would be no satisfaction if `a = 0`.
359 */
360 let xgcd = a2.clone().xgcd(a1.clone());
361 (xgcd.d, xgcd.u)
362 };
363
364 /*
365 This corresponds to step 3 (the second Euclidean step), which again has a described step
366 which short-circuits when `d` is a factor of `s`. When `s` is non-zero, the general solution
367 yields an equivalent result to the short-circuit. When `s` is zero, we do need to explicitly
368 set `x_2 = 0, y_2 = -1, d_1 = d` however.
369
370 `d` is non-zero as it's a factor of `a1, a2`.
371 */
372 let s_is_zero = {
373 let mut s_is_zero = Limb::ZERO;
374 for limb in s.1.as_ref() {
375 s_is_zero |= *limb;
376 }
377 s_is_zero.is_zero()
378 };
379
380 /*
381 If `s` is zero, set it to one so the following GCD call is well-defined.
382
383 NOTE: This is probably extraneous. When one input is zero, most XGCD algorithms do still have
384 a well-defined outputs, which we can probably bound to be the output required here without
385 too many problems. At worst, the implementation provided by the underlying `Limbs`
386 implementation would have to finess it as necessary.
387
388 It's quite simple, and of negligible performance impact, to bound it here however and
389 simplify the `Limbs` API accordingly.
390 */
391 let mut s_for_gcd = s.1.clone();
392 s_for_gcd.as_mut()[0] |= Limb::from(u8::from(s_is_zero));
393 let xgcd = s_for_gcd.xgcd(d.clone());
394
395 let mut d1 = xgcd.d;
396 // `d_1 = d` when `s = 0`
397 d1.ct_assign(&d, s_is_zero);
398
399 let mut x2 = xgcd.u;
400 // If `s` was negative, correct the sign for its coefficient
401 x2.0 ^= !s.0;
402 // `x_2 = 0` when `s = 0`
403 for x2_limb in x2.1.as_mut() {
404 x2_limb.ct_assign(&Limb::ZERO, s_is_zero);
405 }
406
407 // `y_2 = -v`, hence why this negates the sign of `xgcd.v`
408 let mut y2 = (!xgcd.v.0, xgcd.v.1);
409 // `y_2 = -1` when `s = 0`
410 y2.0.ct_assign(&Choice::FALSE, s_is_zero);
411 y2.1.as_mut()[0].ct_assign(&Limb::ONE, s_is_zero);
412 for y2_limb in &mut y2.1.as_mut()[1 ..] {
413 y2_limb.ct_assign(&Limb::ZERO, s_is_zero);
414 }
415
416 (d1, x2, y1, y2)
417 };
418
419 /*
420 `n = (b_1 - b_2) / 2 = ((b_1 + b_2) / 2) - b_2 = -(b_2 - s)`
421
422 `diff_assign` assumes the result will fit within the output container. `(b1 - b2) / 2` will
423 have bit-length at most equivalent to `max(b1, b2)`, so this assumption is upheld.
424
425 We do return `b_2 - s`, not `-(b_2 - s)`, as the described algorithm says to. This is distinct
426 from the more academic description preceding it which does define `n` as we do above, but in
427 either case, the absolute value of the result is bounded as we needed.
428 */
429 let n: I<U> = {
430 let mut b2 = (b2.0, b2.1.clone());
431 let sign = diff_assign((b2.0, b2.1.as_mut()), (s.0, s.1.as_ref()));
432 (sign, b2.1)
433 };
434
435 // `d1` is a factor of `d`, the greatest common factor of `a1, a2`, and therefore non-zero
436 let v1: U = a1.div(&d1);
437 let v2: U = a2.div(&d1);
438
439 /*
440 This computes `r` via the two parts of its equation, before taking their difference, entirely
441 over the stated modulus. Note `mul_mod` is bound to be perfect, yet the `ct_neg_mod`, `sub_mod`
442 helpers may yield `0 ..= modulus`, not `0 .. modulus`.
443
444 As `modulus` is representable in `U`, this isn't an issue except at the very end as we expect
445 `r` to be `0 ..= modulus`. Accordingly, at the very end, we do ensure the accuracy of this
446 value.
447
448 While this is silly, handling the edge case the value is the modulus at the very end avoids
449 doing it at each step.
450 */
451 let r = {
452 let r1 = y1.1.mul_mod(&y2.1, &v1).mul_mod(&n.1, &v1);
453 let r1_is_negative = (!y1.0) ^ (!y2.0) ^ (!n.0);
454 let (r1_was_zero, r1) = r1.ct_neg_mod(&v1, r1_is_negative);
455 let r1_is_modulus = r1_was_zero & r1_is_negative;
456
457 let r2 = x2.1.mul_mod(&c2.rem(&v1), &v1);
458 let (r2_was_zero, r2) = r2.ct_neg_mod(&v1, !x2.0);
459 let r2_is_zero = r2_was_zero & x2.0;
460
461 let mut r_unreduced = r1.sub_mod(&r2, &v1);
462 let r_unreduced_is_modulus = r1_is_modulus & r2_is_zero;
463 for limb in r_unreduced.as_mut() {
464 limb.ct_assign(&Limb::ZERO, r_unreduced_is_modulus);
465 }
466 r_unreduced
467 };
468
469 /*
470 Because `U` can store `2 a`, `U::Wide` can store `4 a3`.
471
472 `2 v2 r < 2 a3` as, by expansion, `2 v2 (_ % v1) < 2 v2 v1`.
473
474 We then just have to prove `b2 < 2 a3`, which is trivial as `2 a3` is bounded to
475 `U::Wide::BITS - 1` and `b2` is bounded to `U::BITS`, where
476 `U::Wide::BITS >= (2 * U::BITS) - 1` (due to the requirement `U::Wide` is able to store any
477 product of any values representable in `U`).
478
479 This completes the proof `b3` is representable in `U::Wide` when `b2 >= 0`. When `b2 < 0`, the
480 proof is trivial as `2 v2 r >= 0`, so the absolute value will be `<= max(|b2|, 2 v2 r)`.
481 */
482 let mut b3 = v2.mul(&r).double();
483 let b3_is_negative = {
484 let mut carry = Limb::ZERO;
485 let mut borrow = Limb::ZERO;
486 let mut b3_limbs = b3.as_mut().iter_mut();
487 for (b3_limb, b2_limb) in
488 (&mut b3_limbs).zip(b2.1.as_ref().iter().chain(core::iter::repeat(&Limb::ZERO)))
489 {
490 let if_add;
491 (if_add, carry) = b3_limb.carrying_add(*b2_limb, carry);
492 let if_sub;
493 (if_sub, borrow) = b3_limb.borrowing_sub(*b2_limb, borrow);
494
495 *b3_limb = Limb::ct_select(&if_sub, &if_add, b2.0);
496 }
497
498 // Clear the borrow if this was an addition, not a subtraction
499 borrow.ct_assign(&Limb::ZERO, b2.0);
500
501 // If the underflowed, negate `b3`
502 let underflowed = !borrow.is_zero();
503 let mut carry = Limb::from(u8::from(underflowed));
504 let mask = Limb::ZERO.wrapping_sub(carry);
505 for b3_limb in b3.as_mut() {
506 let new_limb;
507 (new_limb, carry) = ((*b3_limb) ^ mask).carrying_add(Limb::ZERO, carry);
508 *b3_limb = new_limb;
509 }
510
511 /*
512 This value is set to `b3_is_negative`. The resulting `b` will NOT be `-0` as if `b2 + 2 v2 r`
513 underflowed, the absolute value of the result is non-zero (as `2 v2 r - |b2|` would only
514 underflow if `|b2| > 2 v2 r`).
515 */
516 underflowed
517 };
518
519 let a3: U::Wide = v1.mul(&v2);
520
521 (a3, (!b3_is_negative, b3))
522}
523
524/// Compose a positive definite binary quadratic form with itself.
525///
526/// This requires the form be well-defined, primitive, and with negative _odd_ discriminant. The
527/// form is not bound to be reduced however.
528///
529/// This function assumes:
530/// - `floor(log_2(a)) + 1 < AsRef::<[Limb]>::as_ref(&a).len() * Limb::BITS`
531/// - `AsRef::<[Limb]>::as_ref(&a).len() == AsRef::<[Limb]>::as_ref(&b.1).len()`
532///
533/// This returns the unreduced `a', b'` coefficients of the resulting form, with the following
534/// bounds:
535/// - $a' < 2^(floor(log_2(a^2)) + 1)$
536/// - $b' < 2^(1 + max(floor(log_2(|b|)), floor(log_2(2 * a^2))) + 1)$
537//
538// This is the above `add` function, specialized for the case the forms are the same. Comments
539// which would be duplicated between the two functions are omitted.
540#[expect(clippy::needless_pass_by_value)]
541pub(crate) fn double<U: Limbs>(a: U, b: I<U>, c: U::Wide) -> (U::Wide, I<U::Wide>) {
542 // Because we bound that `delta` is _odd_, we know `s` is non-zero, as $b \cong delta \mod 2$
543 let s = b.clone();
544 let (d1, x2): (U, I<U>) = {
545 // `d = a1, y1 = 0`
546 let d = a.clone();
547
548 let xgcd = s.1.xgcd(d);
549
550 let d1 = xgcd.d;
551 let mut x2 = xgcd.u;
552 x2.0 ^= !s.0;
553
554 (d1, x2)
555 };
556
557 let v1: U = a.div(&d1);
558
559 let r = {
560 // `r1 = 0` as `y1 = 0` (and as `n = 0`)
561
562 let r2 = x2.1.mul_mod(&c.rem(&v1), &v1);
563 let (r2_was_zero, mut r2) = r2.ct_neg_mod(&v1, !x2.0);
564 let r2_is_zero = r2_was_zero & x2.0;
565
566 // `r = 0 - r2 = -r2`
567 let mut borrow = Limb::ZERO;
568 for (mod_limb, r2_limb) in v1.as_ref().iter().zip(r2.as_mut()) {
569 let new_limb;
570 (new_limb, borrow) = mod_limb.borrowing_sub(*r2_limb, borrow);
571 *r2_limb = <_>::ct_select(&new_limb, &Limb::ZERO, r2_is_zero);
572 }
573 r2
574 };
575
576 let mut b3 = v1.mul(&r).double();
577 let b3_is_negative = {
578 let mut carry = Limb::ZERO;
579 let mut borrow = Limb::ZERO;
580 let mut b3_limbs = b3.as_mut().iter_mut();
581 for (b3_limb, b_limb) in
582 (&mut b3_limbs).zip(b.1.as_ref().iter().chain(core::iter::repeat(&Limb::ZERO)))
583 {
584 let if_add;
585 (if_add, carry) = b3_limb.carrying_add(*b_limb, carry);
586 let if_sub;
587 (if_sub, borrow) = b3_limb.borrowing_sub(*b_limb, borrow);
588
589 *b3_limb = Limb::ct_select(&if_sub, &if_add, b.0);
590 }
591
592 borrow.ct_assign(&Limb::ZERO, b.0);
593
594 let underflowed = !borrow.is_zero();
595 let mut carry = Limb::from(u8::from(underflowed));
596 let mask = Limb::ZERO.wrapping_sub(carry);
597 for b3_limb in b3.as_mut() {
598 let new_limb;
599 (new_limb, carry) = ((*b3_limb) ^ mask).carrying_add(Limb::ZERO, carry);
600 *b3_limb = new_limb;
601 }
602
603 underflowed
604 };
605
606 let a3: U::Wide = v1.square();
607
608 (a3, (!b3_is_negative, b3))
609}