1#[inline]
32const fn mul_128(a: u128, b: u128) -> (u128, u128) {
33 let (a_hi, a_lo) = (a >> 64, a & u64::MAX as u128);
34 let (b_hi, b_lo) = (b >> 64, b & u64::MAX as u128);
35 let (mid, carry1) = (a_lo * b_hi).overflowing_add(a_hi * b_lo);
36 let (low, carry2) = (a_lo * b_lo).overflowing_add(mid << 64);
37 let high = a_hi * b_hi + (mid >> 64) + ((carry1 as u128) << 64) + carry2 as u128;
38 (high, low)
39}
40
41#[inline]
43pub(crate) const fn limbs_is_zero(a: &[u128]) -> bool {
44 let mut i = 0;
45 while i < a.len() {
46 if a[i] != 0 {
47 return false;
48 }
49 i += 1;
50 }
51 true
52}
53
54#[inline]
56pub(crate) const fn limbs_eq(a: &[u128], b: &[u128]) -> bool {
57 let n = if a.len() > b.len() { a.len() } else { b.len() };
58 let mut i = 0;
59 while i < n {
60 let av = if i < a.len() { a[i] } else { 0 };
61 let bv = if i < b.len() { b[i] } else { 0 };
62 if av != bv {
63 return false;
64 }
65 i += 1;
66 }
67 true
68}
69
70#[inline]
73pub(crate) const fn limbs_cmp(a: &[u128], b: &[u128]) -> i32 {
74 let n = if a.len() > b.len() { a.len() } else { b.len() };
75 let mut i = n;
76 while i > 0 {
77 i -= 1;
78 let av = if i < a.len() { a[i] } else { 0 };
79 let bv = if i < b.len() { b[i] } else { 0 };
80 if av < bv {
81 return -1;
82 }
83 if av > bv {
84 return 1;
85 }
86 }
87 0
88}
89
90#[inline]
92pub(crate) const fn limbs_bit_len(a: &[u128]) -> u32 {
93 let mut i = a.len();
94 while i > 0 {
95 i -= 1;
96 if a[i] != 0 {
97 return (i as u32) * 128 + (128 - a[i].leading_zeros());
98 }
99 }
100 0
101}
102
103#[inline]
105pub(crate) const fn limbs_add_assign(a: &mut [u128], b: &[u128]) -> bool {
106 let mut carry = 0u128;
107 let mut i = 0;
108 while i < a.len() {
109 let bv = if i < b.len() { b[i] } else { 0 };
110 let (s1, c1) = a[i].overflowing_add(bv);
111 let (s2, c2) = s1.overflowing_add(carry);
112 a[i] = s2;
113 carry = (c1 as u128) + (c2 as u128);
114 i += 1;
115 }
116 carry != 0
117}
118
119#[inline]
121pub(crate) const fn limbs_sub_assign(a: &mut [u128], b: &[u128]) -> bool {
122 let mut borrow = 0u128;
123 let mut i = 0;
124 while i < a.len() {
125 let bv = if i < b.len() { b[i] } else { 0 };
126 let (d1, b1) = a[i].overflowing_sub(bv);
127 let (d2, b2) = d1.overflowing_sub(borrow);
128 a[i] = d2;
129 borrow = (b1 as u128) + (b2 as u128);
130 i += 1;
131 }
132 borrow != 0
133}
134
135pub(crate) const fn limbs_shl(a: &[u128], shift: u32, out: &mut [u128]) {
138 let mut z = 0;
139 while z < out.len() {
140 out[z] = 0;
141 z += 1;
142 }
143 let limb_shift = (shift / 128) as usize;
144 let bit = shift % 128;
145 let mut i = 0;
146 while i < a.len() {
147 let dst = i + limb_shift;
148 if dst < out.len() {
149 if bit == 0 {
150 out[dst] |= a[i];
151 } else {
152 out[dst] |= a[i] << bit;
153 if dst + 1 < out.len() {
154 out[dst + 1] |= a[i] >> (128 - bit);
155 }
156 }
157 }
158 i += 1;
159 }
160}
161
162pub(crate) const fn limbs_shr(a: &[u128], shift: u32, out: &mut [u128]) {
164 let mut z = 0;
165 while z < out.len() {
166 out[z] = 0;
167 z += 1;
168 }
169 let limb_shift = (shift / 128) as usize;
170 let bit = shift % 128;
171 let mut i = limb_shift;
172 while i < a.len() {
173 let dst = i - limb_shift;
174 if dst < out.len() {
175 if bit == 0 {
176 out[dst] |= a[i];
177 } else {
178 out[dst] |= a[i] >> bit;
179 if dst >= 1 {
180 out[dst - 1] |= a[i] << (128 - bit);
181 }
182 }
183 }
184 i += 1;
185 }
186}
187
188pub(crate) const fn limbs_mul(a: &[u128], b: &[u128], out: &mut [u128]) {
191 if a.len() == 2 && b.len() == 2 && out.len() >= 4 {
197 let (a0, a1) = (a[0], a[1]);
198 let (b0, b1) = (b[0], b[1]);
199 let (h0, l0) = mul_128(a0, b0);
202 let (h1, l1) = mul_128(a0, b1);
203 let (h2, l2) = mul_128(a1, b0);
204 let (h3, l3) = mul_128(a1, b1);
205
206 out[0] = l0;
207
208 let (s1, c1a) = h0.overflowing_add(l1);
209 let (s1, c1b) = s1.overflowing_add(l2);
210 out[1] = s1;
211 let mid_carry = (c1a as u128) + (c1b as u128);
212
213 let (s2, c2a) = h1.overflowing_add(h2);
214 let (s2, c2b) = s2.overflowing_add(l3);
215 let (s2, c2c) = s2.overflowing_add(mid_carry);
216 out[2] = s2;
217 let top_carry = (c2a as u128) + (c2b as u128) + (c2c as u128);
218
219 out[3] = h3.wrapping_add(top_carry);
220 return;
221 }
222
223 let mut i = 0;
224 while i < a.len() {
225 if a[i] != 0 {
226 let mut carry = 0u128;
227 let mut j = 0;
228 while j < b.len() {
229 if b[j] != 0 || carry != 0 {
236 let (hi, lo) = mul_128(a[i], b[j]);
237 let idx = i + j;
238 let (s1, c1) = out[idx].overflowing_add(lo);
239 let (s2, c2) = s1.overflowing_add(carry);
240 out[idx] = s2;
241 carry = hi + (c1 as u128) + (c2 as u128);
242 }
243 j += 1;
244 }
245 let mut idx = i + b.len();
246 while carry != 0 && idx < out.len() {
247 let (s, c) = out[idx].overflowing_add(carry);
248 out[idx] = s;
249 carry = c as u128;
250 idx += 1;
251 }
252 }
253 i += 1;
254 }
255}
256
257const KARATSUBA_MIN: usize = 32;
269
270#[cfg(feature = "alloc")]
280pub(crate) fn limbs_mul_fast(a: &[u128], b: &[u128], out: &mut [u128]) {
281 if a.len() == b.len() && a.len() >= KARATSUBA_MIN {
282 limbs_mul_karatsuba(a, b, out);
283 } else {
284 limbs_mul(a, b, out);
285 }
286}
287
288#[cfg(not(feature = "alloc"))]
289pub(crate) fn limbs_mul_fast(a: &[u128], b: &[u128], out: &mut [u128]) {
290 limbs_mul(a, b, out);
291}
292
293#[cfg(feature = "alloc")]
308fn limbs_mul_karatsuba(a: &[u128], b: &[u128], out: &mut [u128]) {
309 debug_assert_eq!(a.len(), b.len());
310 debug_assert!(out.len() >= 2 * a.len());
311 let n = a.len();
312 if n < KARATSUBA_MIN {
313 for o in out.iter_mut().take(2 * n) {
315 *o = 0;
316 }
317 limbs_mul(a, b, out);
318 return;
319 }
320 let h = n / 2;
321 let (a_lo, a_hi_full) = a.split_at(h);
322 let (b_lo, b_hi_full) = b.split_at(h);
323 let a_hi = a_hi_full;
324 let b_hi = b_hi_full;
325
326 let mut z0 = alloc::vec![0u128; 2 * h];
328 limbs_mul_karatsuba_padded(a_lo, b_lo, &mut z0);
329
330 let hi_len = n - h;
332 let mut z2 = alloc::vec![0u128; 2 * hi_len];
333 limbs_mul_karatsuba_padded(a_hi, b_hi, &mut z2);
334
335 let sum_len = core::cmp::max(h, hi_len) + 1;
337 let mut sum_a = alloc::vec![0u128; sum_len];
338 let mut sum_b = alloc::vec![0u128; sum_len];
339 sum_a[..h].copy_from_slice(a_lo);
340 sum_b[..h].copy_from_slice(b_lo);
341 limbs_add_assign(&mut sum_a[..], a_hi);
342 limbs_add_assign(&mut sum_b[..], b_hi);
343
344 let mut z1 = alloc::vec![0u128; 2 * sum_len];
346 limbs_mul_karatsuba_padded(&sum_a, &sum_b, &mut z1);
347
348 limbs_sub_assign(&mut z1[..], &z0);
350 limbs_sub_assign(&mut z1[..], &z2);
352
353 for o in out.iter_mut().take(2 * n) {
356 *o = 0;
357 }
358 let z0_take = core::cmp::min(z0.len(), out.len());
359 out[..z0_take].copy_from_slice(&z0[..z0_take]);
360 let z2_take = core::cmp::min(z2.len(), out.len().saturating_sub(2 * h));
361 if z2_take > 0 {
362 out[2 * h..2 * h + z2_take].copy_from_slice(&z2[..z2_take]);
363 }
364 let z1_take = core::cmp::min(z1.len(), out.len().saturating_sub(h));
366 if z1_take > 0 {
367 limbs_add_assign(&mut out[h..h + z1_take], &z1[..z1_take]);
368 }
369}
370
371#[cfg(feature = "alloc")]
375fn limbs_mul_karatsuba_padded(a: &[u128], b: &[u128], out: &mut [u128]) {
376 if a.len() == b.len() && a.len() >= KARATSUBA_MIN {
377 limbs_mul_karatsuba(a, b, out);
378 } else {
379 for o in out.iter_mut() {
380 *o = 0;
381 }
382 limbs_mul(a, b, out);
383 }
384}
385
386#[inline]
389const fn limbs_shl1(a: &mut [u128]) -> u128 {
390 let mut carry = 0u128;
391 let mut i = 0;
392 while i < a.len() {
393 let new_carry = a[i] >> 127;
394 a[i] = (a[i] << 1) | carry;
395 carry = new_carry;
396 i += 1;
397 }
398 carry
399}
400
401#[inline]
404const fn limbs_fit_one(a: &[u128]) -> bool {
405 let mut i = 1;
406 while i < a.len() {
407 if a[i] != 0 {
408 return false;
409 }
410 i += 1;
411 }
412 true
413}
414
415pub(crate) const fn limbs_divmod(
430 num: &[u128],
431 den: &[u128],
432 quot: &mut [u128],
433 rem: &mut [u128],
434) {
435 let mut z = 0;
436 while z < quot.len() {
437 quot[z] = 0;
438 z += 1;
439 }
440 z = 0;
441 while z < rem.len() {
442 rem[z] = 0;
443 z += 1;
444 }
445
446 let den_one_limb = limbs_fit_one(den);
447
448 if den_one_limb && limbs_fit_one(num) {
450 if !quot.is_empty() {
451 quot[0] = num[0] / den[0];
452 }
453 if !rem.is_empty() {
454 rem[0] = num[0] % den[0];
455 }
456 return;
457 }
458
459 if den_one_limb && den[0] <= u64::MAX as u128 {
463 let d = den[0];
464 let mut r: u128 = 0;
465 let mut top = num.len();
470 while top > 0 && num[top - 1] == 0 {
471 top -= 1;
472 }
473 let mut i = top;
474 while i > 0 {
475 i -= 1;
476 let hi = num[i] >> 64;
477 let acc_hi = (r << 64) | hi;
478 let q_hi = acc_hi / d;
479 r = acc_hi % d;
480 let lo = num[i] & u64::MAX as u128;
481 let acc_lo = (r << 64) | lo;
482 let q_lo = acc_lo / d;
483 r = acc_lo % d;
484 if i < quot.len() {
485 quot[i] = (q_hi << 64) | q_lo;
486 }
487 }
488 if !rem.is_empty() {
489 rem[0] = r;
490 }
491 return;
492 }
493
494 let bits = limbs_bit_len(num);
497 let mut i = bits;
498 while i > 0 {
499 i -= 1;
500 limbs_shl1(rem);
501 let bit = (num[(i / 128) as usize] >> (i % 128)) & 1;
502 rem[0] |= bit;
503 limbs_shl1(quot);
504 if limbs_cmp(rem, den) >= 0 {
505 limbs_sub_assign(rem, den);
506 quot[0] |= 1;
507 }
508 }
509}
510
511const SCRATCH_LIMBS: usize = 72;
515
516pub(crate) fn limbs_divmod_dispatch(
536 num: &[u128],
537 den: &[u128],
538 quot: &mut [u128],
539 rem: &mut [u128],
540) {
541 const BZ_THRESHOLD: usize = 8;
542
543 let mut n = den.len();
544 while n > 0 && den[n - 1] == 0 {
545 n -= 1;
546 }
547 assert!(n > 0, "limbs_divmod_dispatch: divide by zero");
548
549 let mut top = num.len();
550 while top > 0 && num[top - 1] == 0 {
551 top -= 1;
552 }
553
554 if n == 1 && top <= 1 {
556 limbs_divmod(num, den, quot, rem);
557 return;
558 }
559
560 if n == 1 && den[0] <= u64::MAX as u128 {
567 limbs_divmod(num, den, quot, rem);
568 return;
569 }
570
571 if n >= BZ_THRESHOLD && top >= 2 * n {
575 limbs_divmod_bz(num, den, quot, rem);
576 } else {
577 limbs_divmod_knuth(num, den, quot, rem);
578 }
579}
580
581#[derive(Clone, Copy)]
598pub(crate) struct MG2by1 {
599 d: u128,
601 v: u128,
603}
604
605impl MG2by1 {
606 #[inline]
616 pub(crate) const fn new(d: u128) -> Self {
617 debug_assert!(d >> 127 == 1, "MG2by1::new: divisor must be normalised");
618 let (v, _r) = div_2_by_1(!d, u128::MAX, d);
619 Self { d, v }
620 }
621
622 #[inline]
638 pub(crate) const fn div_rem(&self, u1: u128, u0: u128) -> (u128, u128) {
639 debug_assert!(u1 < self.d, "MG2by1::div_rem: high word must be < divisor");
640 let (vu1_hi, vu1_lo) = mul_128(self.v, u1);
642 let (q0, c_lo) = vu1_lo.overflowing_add(u0);
643 let (q1, _c_hi_a) = vu1_hi.overflowing_add(u1);
644 let (q1, _c_hi_b) = q1.overflowing_add(c_lo as u128);
645 let q1 = q1.wrapping_add(1);
647 let r = u0.wrapping_sub(q1.wrapping_mul(self.d));
649 let (q1, r) = if r > q0 {
651 (q1.wrapping_sub(1), r.wrapping_add(self.d))
652 } else {
653 (q1, r)
654 };
655 if r >= self.d {
657 (q1.wrapping_add(1), r.wrapping_sub(self.d))
658 } else {
659 (q1, r)
660 }
661 }
662}
663
664#[inline]
674const fn div_2_by_1(high: u128, low: u128, d: u128) -> (u128, u128) {
675 let mut q: u128 = 0;
682 let mut r = high;
683 let mut i = 128;
684 while i > 0 {
685 i -= 1;
686 let r_top = r >> 127;
687 r = (r << 1) | ((low >> i) & 1);
688 q <<= 1;
689 if r_top != 0 || r >= d {
692 r = r.wrapping_sub(d);
693 q |= 1;
694 }
695 }
696 (q, r)
697}
698
699pub(crate) fn limbs_divmod_knuth(
724 num: &[u128],
725 den: &[u128],
726 quot: &mut [u128],
727 rem: &mut [u128],
728) {
729 for q in quot.iter_mut() {
730 *q = 0;
731 }
732 for r in rem.iter_mut() {
733 *r = 0;
734 }
735
736 let mut n = den.len();
738 while n > 0 && den[n - 1] == 0 {
739 n -= 1;
740 }
741 assert!(n > 0, "limbs_divmod_knuth: divide by zero");
742
743 let mut top = num.len();
744 while top > 0 && num[top - 1] == 0 {
745 top -= 1;
746 }
747 if top < n {
748 let copy_n = num.len().min(rem.len());
750 let mut i = 0;
751 while i < copy_n {
752 rem[i] = num[i];
753 i += 1;
754 }
755 return;
756 }
757
758 let shift = den[n - 1].leading_zeros();
762
763 let mut u = [0u128; SCRATCH_LIMBS];
764 let mut v = [0u128; SCRATCH_LIMBS];
765 debug_assert!(top < SCRATCH_LIMBS && n <= SCRATCH_LIMBS);
766
767 if shift == 0 {
768 u[..top].copy_from_slice(&num[..top]);
769 u[top] = 0;
770 v[..n].copy_from_slice(&den[..n]);
771 } else {
772 let mut carry: u128 = 0;
773 for i in 0..top {
774 let val = num[i];
775 u[i] = (val << shift) | carry;
776 carry = val >> (128 - shift);
777 }
778 u[top] = carry;
779 carry = 0;
780 for i in 0..n {
781 let val = den[i];
782 v[i] = (val << shift) | carry;
783 carry = val >> (128 - shift);
784 }
785 }
786
787 let m_plus_n = if u[top] != 0 { top + 1 } else { top };
788 debug_assert!(m_plus_n >= n);
789 let m = m_plus_n - n;
790
791 let mg_top = MG2by1::new(v[n - 1]);
797
798 let mut j_plus_one = m + 1;
800 while j_plus_one > 0 {
801 j_plus_one -= 1;
802 let j = j_plus_one;
803
804 let u_top = u[j + n];
806 let u_next = u[j + n - 1];
807 let v_top = v[n - 1];
808
809 let (mut q_hat, mut r_hat) = if u_top >= v_top {
810 let q = u128::MAX;
819 let (r, of) = u_next.overflowing_add(v_top);
820 if of || u_top > v_top {
823 (q, u128::MAX) } else {
825 (q, r)
826 }
827 } else {
828 mg_top.div_rem(u_top, u_next)
829 };
830
831 if n >= 2 {
833 let v_below = v[n - 2];
834 loop {
835 let (hi, lo) = mul_128(q_hat, v_below);
836 let rhs_lo = u[j + n - 2];
837 let rhs_hi = r_hat;
838 if hi < rhs_hi || (hi == rhs_hi && lo <= rhs_lo) {
840 break;
841 }
842 q_hat = q_hat.wrapping_sub(1);
843 let (new_r, of) = r_hat.overflowing_add(v_top);
844 if of {
845 break;
846 }
847 r_hat = new_r;
848 }
849 }
850
851 let mut mul_carry: u128 = 0;
853 let mut borrow: u128 = 0;
854 for i in 0..n {
855 let (hi, lo) = mul_128(q_hat, v[i]);
856 let (prod_lo, c1) = lo.overflowing_add(mul_carry);
857 let new_mul_carry = hi + u128::from(c1);
858 let (s1, b1) = u[j + i].overflowing_sub(prod_lo);
859 let (s2, b2) = s1.overflowing_sub(borrow);
860 u[j + i] = s2;
861 borrow = u128::from(b1) + u128::from(b2);
862 mul_carry = new_mul_carry;
863 }
864 let (s1, b1) = u[j + n].overflowing_sub(mul_carry);
865 let (s2, b2) = s1.overflowing_sub(borrow);
866 u[j + n] = s2;
867 let final_borrow = u128::from(b1) + u128::from(b2);
868
869 if final_borrow != 0 {
872 q_hat = q_hat.wrapping_sub(1);
873 let mut carry: u128 = 0;
874 for i in 0..n {
875 let (s1, c1) = u[j + i].overflowing_add(v[i]);
876 let (s2, c2) = s1.overflowing_add(carry);
877 u[j + i] = s2;
878 carry = u128::from(c1) + u128::from(c2);
879 }
880 u[j + n] = u[j + n].wrapping_add(carry);
882 }
883
884 if j < quot.len() {
885 quot[j] = q_hat;
886 }
887 }
888
889 if shift == 0 {
891 let copy_n = n.min(rem.len());
892 rem[..copy_n].copy_from_slice(&u[..copy_n]);
893 } else {
894 for i in 0..n {
895 if i < rem.len() {
896 let lo = u[i] >> shift;
897 let hi_into_lo = if i + 1 < n {
898 u[i + 1] << (128 - shift)
899 } else {
900 0
901 };
902 rem[i] = lo | hi_into_lo;
903 }
904 }
905 }
906}
907
908pub(crate) fn limbs_divmod_bz(
926 num: &[u128],
927 den: &[u128],
928 quot: &mut [u128],
929 rem: &mut [u128],
930) {
931 const BZ_THRESHOLD: usize = 8;
932
933 let mut n = den.len();
934 while n > 0 && den[n - 1] == 0 {
935 n -= 1;
936 }
937 assert!(n > 0, "limbs_divmod_bz: divide by zero");
938
939 let mut top = num.len();
940 while top > 0 && num[top - 1] == 0 {
941 top -= 1;
942 }
943
944 if n < BZ_THRESHOLD || top < 2 * n {
945 limbs_divmod_knuth(num, den, quot, rem);
947 return;
948 }
949
950 for q in quot.iter_mut() {
960 *q = 0;
961 }
962 for r in rem.iter_mut() {
963 *r = 0;
964 }
965
966 let chunks = top.div_ceil(n);
969 let mut carry = [0u128; SCRATCH_LIMBS];
970 let mut buf = [0u128; SCRATCH_LIMBS];
971 let mut q_chunk = [0u128; SCRATCH_LIMBS];
972 let mut r_chunk = [0u128; SCRATCH_LIMBS];
973
974 let mut idx = chunks;
975 while idx > 0 {
976 idx -= 1;
977 let lo = idx * n;
978 let hi = ((idx + 1) * n).min(top);
979 buf.fill(0);
982 let chunk_len = hi - lo;
983 buf[..chunk_len].copy_from_slice(&num[lo..lo + chunk_len]);
984 buf[chunk_len..chunk_len + n].copy_from_slice(&carry[..n]);
985 let buf_len = chunk_len + n;
986 limbs_divmod_knuth(
988 &buf[..buf_len],
989 &den[..n],
990 &mut q_chunk[..buf_len],
991 &mut r_chunk[..n],
992 );
993 let store_end = (lo + n).min(quot.len());
995 let store_len = store_end.saturating_sub(lo);
996 quot[lo..lo + store_len].copy_from_slice(&q_chunk[..store_len]);
997 carry[..n].copy_from_slice(&r_chunk[..n]);
999 }
1000 let rem_n = n.min(rem.len());
1001 rem[..rem_n].copy_from_slice(&carry[..rem_n]);
1002}
1003
1004pub(crate) fn limbs_isqrt(n: &[u128], out: &mut [u128]) {
1007 for o in out.iter_mut() {
1008 *o = 0;
1009 }
1010 let bits = limbs_bit_len(n);
1011 if bits == 0 {
1012 return;
1013 }
1014 if bits <= 1 {
1015 out[0] = 1;
1016 return;
1017 }
1018 let work = n.len() + 1;
1019 debug_assert!(work <= SCRATCH_LIMBS, "wide-int isqrt scratch overflow");
1020 let mut x = [0u128; SCRATCH_LIMBS];
1021 let e = bits.div_ceil(2);
1022 x[(e / 128) as usize] |= 1u128 << (e % 128);
1023 loop {
1024 let mut q = [0u128; SCRATCH_LIMBS];
1025 let mut r = [0u128; SCRATCH_LIMBS];
1026 limbs_divmod(n, &x[..work], &mut q[..work], &mut r[..work]);
1027 limbs_add_assign(&mut q[..work], &x[..work]);
1028 let mut y = [0u128; SCRATCH_LIMBS];
1029 limbs_shr(&q[..work], 1, &mut y[..work]);
1030 if limbs_cmp(&y[..work], &x[..work]) >= 0 {
1031 break;
1032 }
1033 x = y;
1034 }
1035 let copy_len = if out.len() < work { out.len() } else { work };
1036 out[..copy_len].copy_from_slice(&x[..copy_len]);
1037}
1038
1039fn limbs_div_small(limbs: &mut [u128], radix: u128) -> u128 {
1042 let mut rem = 0u128;
1043 for limb in limbs.iter_mut().rev() {
1044 let hi = (*limb) >> 64;
1045 let lo = (*limb) & u128::from(u64::MAX);
1046 let acc_hi = (rem << 64) | hi;
1047 let q_hi = acc_hi / radix;
1048 let r1 = acc_hi % radix;
1049 let acc_lo = (r1 << 64) | lo;
1050 let q_lo = acc_lo / radix;
1051 rem = acc_lo % radix;
1052 *limb = (q_hi << 64) | q_lo;
1053 }
1054 rem
1055}
1056
1057pub(crate) fn limbs_fmt_into<'a>(
1061 limbs: &[u128],
1062 radix: u128,
1063 lower: bool,
1064 buf: &'a mut [u8],
1065) -> &'a str {
1066 let digits: &[u8] = if lower {
1067 b"0123456789abcdef"
1068 } else {
1069 b"0123456789ABCDEF"
1070 };
1071 if limbs_is_zero(limbs) {
1072 let last = buf.len() - 1;
1073 buf[last] = b'0';
1074 return core::str::from_utf8(&buf[last..]).unwrap();
1075 }
1076 let mut work = [0u128; SCRATCH_LIMBS];
1077 work[..limbs.len()].copy_from_slice(limbs);
1078 let wl = limbs.len();
1079 let mut pos = buf.len();
1080 while !limbs_is_zero(&work[..wl]) {
1081 let r = limbs_div_small(&mut work[..wl], radix);
1082 pos -= 1;
1083 buf[pos] = digits[r as usize];
1084 }
1085 core::str::from_utf8(&buf[pos..]).unwrap()
1086}
1087
1088#[inline]
1108pub(crate) const fn limbs_is_zero_u64(a: &[u64]) -> bool {
1109 let mut i = 0;
1110 while i < a.len() {
1111 if a[i] != 0 {
1112 return false;
1113 }
1114 i += 1;
1115 }
1116 true
1117}
1118
1119#[inline]
1122pub(crate) const fn limbs_is_zero_u64_fixed<const L: usize>(a: &[u64; L]) -> bool {
1123 let mut i = 0;
1124 while i < L {
1125 if a[i] != 0 {
1126 return false;
1127 }
1128 i += 1;
1129 }
1130 true
1131}
1132
1133#[inline]
1135pub(crate) const fn limbs_eq_u64(a: &[u64], b: &[u64]) -> bool {
1136 let n = if a.len() > b.len() { a.len() } else { b.len() };
1137 let mut i = 0;
1138 while i < n {
1139 let av = if i < a.len() { a[i] } else { 0 };
1140 let bv = if i < b.len() { b[i] } else { 0 };
1141 if av != bv {
1142 return false;
1143 }
1144 i += 1;
1145 }
1146 true
1147}
1148
1149#[inline]
1151pub(crate) const fn limbs_cmp_u64(a: &[u64], b: &[u64]) -> i32 {
1152 let n = if a.len() > b.len() { a.len() } else { b.len() };
1153 let mut i = n;
1154 while i > 0 {
1155 i -= 1;
1156 let av = if i < a.len() { a[i] } else { 0 };
1157 let bv = if i < b.len() { b[i] } else { 0 };
1158 if av < bv {
1159 return -1;
1160 }
1161 if av > bv {
1162 return 1;
1163 }
1164 }
1165 0
1166}
1167
1168#[inline]
1171pub(crate) const fn limbs_cmp_u64_fixed<const L: usize>(a: &[u64; L], b: &[u64; L]) -> i32 {
1172 let mut i = L;
1173 while i > 0 {
1174 i -= 1;
1175 if a[i] < b[i] {
1176 return -1;
1177 }
1178 if a[i] > b[i] {
1179 return 1;
1180 }
1181 }
1182 0
1183}
1184
1185#[inline]
1187pub(crate) const fn limbs_bit_len_u64(a: &[u64]) -> u32 {
1188 let mut i = a.len();
1189 while i > 0 {
1190 i -= 1;
1191 if a[i] != 0 {
1192 return (i as u32) * 64 + (64 - a[i].leading_zeros());
1193 }
1194 }
1195 0
1196}
1197
1198#[inline]
1200pub(crate) const fn limbs_bit_len_u64_fixed<const L: usize>(a: &[u64; L]) -> u32 {
1201 let mut i = L;
1202 while i > 0 {
1203 i -= 1;
1204 if a[i] != 0 {
1205 return (i as u32) * 64 + (64 - a[i].leading_zeros());
1206 }
1207 }
1208 0
1209}
1210
1211#[inline]
1213pub(crate) const fn limbs_add_assign_u64(a: &mut [u64], b: &[u64]) -> bool {
1214 let mut carry: u64 = 0;
1215 let mut i = 0;
1216 while i < a.len() {
1217 let bv = if i < b.len() { b[i] } else { 0 };
1218 let (s1, c1) = a[i].overflowing_add(bv);
1219 let (s2, c2) = s1.overflowing_add(carry);
1220 a[i] = s2;
1221 carry = (c1 as u64) + (c2 as u64);
1222 i += 1;
1223 }
1224 carry != 0
1225}
1226
1227#[inline]
1230pub(crate) const fn limbs_add_assign_u64_fixed<const L: usize>(
1231 a: &mut [u64; L],
1232 b: &[u64; L],
1233) -> bool {
1234 let mut carry: u64 = 0;
1235 let mut i = 0;
1236 while i < L {
1237 let (s1, c1) = a[i].overflowing_add(b[i]);
1238 let (s2, c2) = s1.overflowing_add(carry);
1239 a[i] = s2;
1240 carry = (c1 as u64) + (c2 as u64);
1241 i += 1;
1242 }
1243 carry != 0
1244}
1245
1246#[inline]
1248pub(crate) const fn limbs_sub_assign_u64(a: &mut [u64], b: &[u64]) -> bool {
1249 let mut borrow: u64 = 0;
1250 let mut i = 0;
1251 while i < a.len() {
1252 let bv = if i < b.len() { b[i] } else { 0 };
1253 let (d1, b1) = a[i].overflowing_sub(bv);
1254 let (d2, b2) = d1.overflowing_sub(borrow);
1255 a[i] = d2;
1256 borrow = (b1 as u64) + (b2 as u64);
1257 i += 1;
1258 }
1259 borrow != 0
1260}
1261
1262#[inline]
1264pub(crate) const fn limbs_sub_assign_u64_fixed<const L: usize>(
1265 a: &mut [u64; L],
1266 b: &[u64; L],
1267) -> bool {
1268 let mut borrow: u64 = 0;
1269 let mut i = 0;
1270 while i < L {
1271 let (d1, b1) = a[i].overflowing_sub(b[i]);
1272 let (d2, b2) = d1.overflowing_sub(borrow);
1273 a[i] = d2;
1274 borrow = (b1 as u64) + (b2 as u64);
1275 i += 1;
1276 }
1277 borrow != 0
1278}
1279
1280#[inline]
1284pub(crate) const fn limbs_shl_u64_fixed<const L: usize>(
1285 a: &[u64; L],
1286 shift: u32,
1287 out: &mut [u64; L],
1288) {
1289 let mut z = 0;
1290 while z < L {
1291 out[z] = 0;
1292 z += 1;
1293 }
1294 let limb_shift = (shift / 64) as usize;
1295 let bit = shift % 64;
1296 let mut i = 0;
1297 while i < L {
1298 let dst = i + limb_shift;
1299 if dst < L {
1300 if bit == 0 {
1301 out[dst] |= a[i];
1302 } else {
1303 out[dst] |= a[i] << bit;
1304 if dst + 1 < L {
1305 out[dst + 1] |= a[i] >> (64 - bit);
1306 }
1307 }
1308 }
1309 i += 1;
1310 }
1311}
1312
1313#[inline]
1315pub(crate) const fn limbs_shr_u64_fixed<const L: usize>(
1316 a: &[u64; L],
1317 shift: u32,
1318 out: &mut [u64; L],
1319) {
1320 let mut z = 0;
1321 while z < L {
1322 out[z] = 0;
1323 z += 1;
1324 }
1325 let limb_shift = (shift / 64) as usize;
1326 let bit = shift % 64;
1327 let mut i = limb_shift;
1328 while i < L {
1329 let dst = i - limb_shift;
1330 if dst < L {
1331 if bit == 0 {
1332 out[dst] |= a[i];
1333 } else {
1334 out[dst] |= a[i] >> bit;
1335 if dst >= 1 {
1336 out[dst - 1] |= a[i] << (64 - bit);
1337 }
1338 }
1339 }
1340 i += 1;
1341 }
1342}
1343
1344pub(crate) const fn limbs_shl_u64(a: &[u64], shift: u32, out: &mut [u64]) {
1346 let mut z = 0;
1347 while z < out.len() {
1348 out[z] = 0;
1349 z += 1;
1350 }
1351 let limb_shift = (shift / 64) as usize;
1352 let bit = shift % 64;
1353 let mut i = 0;
1354 while i < a.len() {
1355 let dst = i + limb_shift;
1356 if dst < out.len() {
1357 if bit == 0 {
1358 out[dst] |= a[i];
1359 } else {
1360 out[dst] |= a[i] << bit;
1361 if dst + 1 < out.len() {
1362 out[dst + 1] |= a[i] >> (64 - bit);
1363 }
1364 }
1365 }
1366 i += 1;
1367 }
1368}
1369
1370pub(crate) const fn limbs_shr_u64(a: &[u64], shift: u32, out: &mut [u64]) {
1372 let mut z = 0;
1373 while z < out.len() {
1374 out[z] = 0;
1375 z += 1;
1376 }
1377 let limb_shift = (shift / 64) as usize;
1378 let bit = shift % 64;
1379 let mut i = limb_shift;
1380 while i < a.len() {
1381 let dst = i - limb_shift;
1382 if dst < out.len() {
1383 if bit == 0 {
1384 out[dst] |= a[i];
1385 } else {
1386 out[dst] |= a[i] >> bit;
1387 if dst >= 1 {
1388 out[dst - 1] |= a[i] << (64 - bit);
1389 }
1390 }
1391 }
1392 i += 1;
1393 }
1394}
1395
1396#[inline]
1398const fn limbs_shl1_u64(a: &mut [u64]) -> u64 {
1399 let mut carry: u64 = 0;
1400 let mut i = 0;
1401 while i < a.len() {
1402 let new_carry = a[i] >> 63;
1403 a[i] = (a[i] << 1) | carry;
1404 carry = new_carry;
1405 i += 1;
1406 }
1407 carry
1408}
1409
1410#[inline]
1412const fn limbs_fit_one_u64(a: &[u64]) -> bool {
1413 let mut i = 1;
1414 while i < a.len() {
1415 if a[i] != 0 {
1416 return false;
1417 }
1418 i += 1;
1419 }
1420 true
1421}
1422
1423pub(crate) const fn limbs_mul_u64(a: &[u64], b: &[u64], out: &mut [u64]) {
1430 let mut i = 0;
1431 while i < a.len() {
1432 if a[i] != 0 {
1433 let mut carry: u64 = 0;
1434 let mut j = 0;
1435 while j < b.len() {
1436 if b[j] != 0 || carry != 0 {
1437 let prod = (a[i] as u128) * (b[j] as u128);
1438 let prod_lo = prod as u64;
1439 let prod_hi = (prod >> 64) as u64;
1440 let idx = i + j;
1441 let (s1, c1) = out[idx].overflowing_add(prod_lo);
1442 let (s2, c2) = s1.overflowing_add(carry);
1443 out[idx] = s2;
1444 carry = prod_hi + (c1 as u64) + (c2 as u64);
1445 }
1446 j += 1;
1447 }
1448 let mut idx = i + b.len();
1449 while carry != 0 && idx < out.len() {
1450 let (s, c) = out[idx].overflowing_add(carry);
1451 out[idx] = s;
1452 carry = c as u64;
1453 idx += 1;
1454 }
1455 }
1456 i += 1;
1457 }
1458}
1459
1460#[inline]
1471pub(crate) const fn limbs_mul_u64_fixed<const L: usize, const D: usize>(
1472 a: &[u64; L],
1473 b: &[u64; L],
1474 out: &mut [u64; D],
1475) {
1476 debug_assert!(D >= 2 * L, "limbs_mul_u64_fixed: D must be ≥ 2·L");
1477 let mut i = 0;
1478 while i < L {
1479 let ai = a[i];
1480 if ai != 0 {
1481 let mut carry: u64 = 0;
1482 let mut j = 0;
1483 while j < L {
1484 let v = (ai as u128) * (b[j] as u128)
1485 + (out[i + j] as u128)
1486 + (carry as u128);
1487 out[i + j] = v as u64;
1488 carry = (v >> 64) as u64;
1489 j += 1;
1490 }
1491 let mut idx = i + L;
1495 let mut c = carry;
1496 while c != 0 && idx < D {
1497 let v = (out[idx] as u128) + (c as u128);
1498 out[idx] = v as u64;
1499 c = (v >> 64) as u64;
1500 idx += 1;
1501 }
1502 }
1503 i += 1;
1504 }
1505}
1506
1507#[inline(always)]
1525pub(crate) const fn limbs_mul_u64_into<const L: usize, const LP1: usize>(
1526 a: &[u64; L],
1527 n: u64,
1528 out: &mut [u64; LP1],
1529) {
1530 debug_assert!(LP1 == L + 1, "limbs_mul_u64_into: LP1 must equal L + 1");
1531 let mut carry: u64 = 0;
1532 let mut i = 0;
1533 while i < L {
1534 let p = (a[i] as u128) * (n as u128)
1538 + (out[i] as u128)
1539 + (carry as u128);
1540 out[i] = p as u64;
1541 carry = (p >> 64) as u64;
1542 i += 1;
1543 }
1544 out[L] = carry;
1545}
1546
1547pub(crate) const fn limbs_divmod_u64(
1555 num: &[u64],
1556 den: &[u64],
1557 quot: &mut [u64],
1558 rem: &mut [u64],
1559) {
1560 let mut z = 0;
1561 while z < quot.len() {
1562 quot[z] = 0;
1563 z += 1;
1564 }
1565 z = 0;
1566 while z < rem.len() {
1567 rem[z] = 0;
1568 z += 1;
1569 }
1570
1571 let den_one_limb = limbs_fit_one_u64(den);
1572
1573 if den_one_limb && limbs_fit_one_u64(num) {
1575 if !quot.is_empty() {
1576 quot[0] = num[0] / den[0];
1577 }
1578 if !rem.is_empty() {
1579 rem[0] = num[0] % den[0];
1580 }
1581 return;
1582 }
1583
1584 if den_one_limb {
1590 let d = den[0];
1591 let mut r: u64 = 0;
1592 let mut top = num.len();
1593 while top > 0 && num[top - 1] == 0 {
1594 top -= 1;
1595 }
1596 let mut i = top;
1597 while i > 0 {
1598 i -= 1;
1599 let acc = ((r as u128) << 64) | (num[i] as u128);
1600 let q = (acc / (d as u128)) as u64;
1601 r = (acc % (d as u128)) as u64;
1602 if i < quot.len() {
1603 quot[i] = q;
1604 }
1605 }
1606 if !rem.is_empty() {
1607 rem[0] = r;
1608 }
1609 return;
1610 }
1611
1612 let bits = limbs_bit_len_u64(num);
1616 let mut i = bits;
1617 while i > 0 {
1618 i -= 1;
1619 limbs_shl1_u64(rem);
1620 let bit = (num[(i / 64) as usize] >> (i % 64)) & 1;
1621 rem[0] |= bit;
1622 limbs_shl1_u64(quot);
1623 if limbs_cmp_u64(rem, den) >= 0 {
1624 limbs_sub_assign_u64(rem, den);
1625 quot[0] |= 1;
1626 }
1627 }
1628}
1629
1630const SCRATCH_LIMBS_U64: usize = 288;
1636
1637pub(crate) const KARATSUBA_THRESHOLD_U64: usize = 256;
1646
1647#[cfg(feature = "alloc")]
1659pub(crate) fn limbs_mul_karatsuba_u64(a: &[u64], b: &[u64], out: &mut [u64]) {
1660 debug_assert_eq!(a.len(), b.len());
1661 debug_assert!(out.len() >= 2 * a.len());
1662 let n = a.len();
1663 if n < KARATSUBA_THRESHOLD_U64 {
1664 limbs_mul_u64(a, b, out);
1665 return;
1666 }
1667 let h = n / 2;
1668 let hi_len = n - h;
1669 let (a_lo, a_hi) = a.split_at(h);
1670 let (b_lo, b_hi) = b.split_at(h);
1671
1672 let mut z0 = alloc::vec![0u64; 2 * h];
1674 limbs_mul_karatsuba_padded_u64(a_lo, b_lo, &mut z0);
1675
1676 let mut z2 = alloc::vec![0u64; 2 * hi_len];
1678 limbs_mul_karatsuba_padded_u64(a_hi, b_hi, &mut z2);
1679
1680 let sum_len = core::cmp::max(h, hi_len) + 1;
1683 let mut sum_a = alloc::vec![0u64; sum_len];
1684 let mut sum_b = alloc::vec![0u64; sum_len];
1685 sum_a[..h].copy_from_slice(a_lo);
1686 sum_b[..h].copy_from_slice(b_lo);
1687 let _ = limbs_add_assign_u64(&mut sum_a[..], a_hi);
1688 let _ = limbs_add_assign_u64(&mut sum_b[..], b_hi);
1689
1690 let mut z1 = alloc::vec![0u64; 2 * sum_len];
1692 limbs_mul_karatsuba_padded_u64(&sum_a, &sum_b, &mut z1);
1693 let _ = limbs_sub_assign_u64(&mut z1[..], &z0);
1694 let _ = limbs_sub_assign_u64(&mut z1[..], &z2);
1695
1696 for o in out.iter_mut().take(2 * n) {
1698 *o = 0;
1699 }
1700 let z0_take = core::cmp::min(z0.len(), out.len());
1701 out[..z0_take].copy_from_slice(&z0[..z0_take]);
1702 let z2_take = core::cmp::min(z2.len(), out.len().saturating_sub(2 * h));
1703 if z2_take > 0 {
1704 out[2 * h..2 * h + z2_take].copy_from_slice(&z2[..z2_take]);
1705 }
1706 let z1_take = core::cmp::min(z1.len(), out.len().saturating_sub(h));
1707 if z1_take > 0 {
1708 let _ = limbs_add_assign_u64(&mut out[h..h + z1_take], &z1[..z1_take]);
1709 }
1710}
1711
1712#[cfg(feature = "alloc")]
1715fn limbs_mul_karatsuba_padded_u64(a: &[u64], b: &[u64], out: &mut [u64]) {
1716 if a.len() == b.len() && a.len() >= KARATSUBA_THRESHOLD_U64 {
1717 limbs_mul_karatsuba_u64(a, b, out);
1718 } else {
1719 for o in out.iter_mut() {
1720 *o = 0;
1721 }
1722 limbs_mul_u64(a, b, out);
1723 }
1724}
1725
1726pub(crate) fn limbs_mul_fast_u64(a: &[u64], b: &[u64], out: &mut [u64]) {
1730 #[cfg(feature = "alloc")]
1731 {
1732 if a.len() == b.len() && a.len() >= KARATSUBA_THRESHOLD_U64 {
1733 for o in out.iter_mut() {
1734 *o = 0;
1735 }
1736 limbs_mul_karatsuba_u64(a, b, out);
1737 return;
1738 }
1739 }
1740 limbs_mul_u64(a, b, out);
1741}
1742
1743#[derive(Clone, Copy)]
1752pub(crate) struct MG2by1U64 {
1753 d: u64,
1754 v: u64,
1755}
1756
1757impl MG2by1U64 {
1758 #[inline]
1760 pub(crate) const fn new(d: u64) -> Self {
1761 debug_assert!(d >> 63 == 1, "MG2by1U64::new: divisor must be normalised");
1762 let num = ((!d as u128) << 64) | (u64::MAX as u128);
1767 let v = (num / (d as u128)) as u64;
1768 Self { d, v }
1769 }
1770
1771 #[inline]
1773 pub(crate) const fn div_rem(&self, u1: u64, u0: u64) -> (u64, u64) {
1774 debug_assert!(u1 < self.d, "MG2by1U64::div_rem: high word must be < divisor");
1775 let q128 = (self.v as u128).wrapping_mul(u1 as u128)
1777 .wrapping_add(((u1 as u128) << 64) | (u0 as u128));
1778 let mut q1 = (q128 >> 64) as u64;
1779 let q0 = q128 as u64;
1780 q1 = q1.wrapping_add(1);
1781 let mut r = u0.wrapping_sub(q1.wrapping_mul(self.d));
1782 if r > q0 {
1783 q1 = q1.wrapping_sub(1);
1784 r = r.wrapping_add(self.d);
1785 }
1786 if r >= self.d {
1787 q1 = q1.wrapping_add(1);
1788 r = r.wrapping_sub(self.d);
1789 }
1790 (q1, r)
1791 }
1792}
1793
1794#[derive(Clone, Copy)]
1812pub(crate) struct MG3by2U64 {
1813 d1: u64,
1814 d0: u64,
1815 dinv: u64,
1817}
1818
1819impl MG3by2U64 {
1820 #[inline]
1833 pub(crate) const fn new(d1: u64, d0: u64) -> Self {
1834 debug_assert!(d1 >> 63 == 1, "MG3by2U64::new: top divisor limb must be normalised");
1835 let num = ((!d1 as u128) << 64) | (u64::MAX as u128);
1837 let mut v = (num / (d1 as u128)) as u64;
1838
1839 let mut p = d1.wrapping_mul(v).wrapping_add(d0);
1842 if p < d0 {
1843 v = v.wrapping_sub(1);
1844 let mask = if p >= d1 { u64::MAX } else { 0 };
1845 p = p.wrapping_sub(d1);
1846 v = v.wrapping_add(mask);
1847 p = p.wrapping_sub(mask & d1);
1848 }
1849
1850 let prod = (d0 as u128) * (v as u128);
1854 let t1 = (prod >> 64) as u64;
1855 let t0 = prod as u64;
1856 let (new_p, carry) = p.overflowing_add(t1);
1857 let _p_final = new_p;
1858 if carry {
1859 v = v.wrapping_sub(1);
1860 if new_p >= d1 && (new_p > d1 || t0 >= d0) {
1861 v = v.wrapping_sub(1);
1862 }
1863 }
1864
1865 Self { d1, d0, dinv: v }
1866 }
1867
1868 #[inline]
1882 pub(crate) const fn div_rem(&self, n2: u64, n1: u64, n0: u64) -> (u64, u64, u64) {
1883 debug_assert!(
1884 n2 < self.d1 || (n2 == self.d1 && n1 < self.d0),
1885 "MG3by2U64::div_rem: numerator high pair must be < divisor"
1886 );
1887
1888 let prod = (n2 as u128).wrapping_mul(self.dinv as u128)
1893 .wrapping_add(((n2 as u128) << 64) | (n1 as u128));
1894 let mut q = (prod >> 64) as u64;
1895 let q_lo = prod as u64;
1896
1897 let mut r1 = n1.wrapping_sub(q.wrapping_mul(self.d1));
1899
1900 let r256 = (((r1 as u128) << 64) | (n0 as u128))
1902 .wrapping_sub(((self.d1 as u128) << 64) | (self.d0 as u128));
1903 r1 = (r256 >> 64) as u64;
1904 let mut r0 = r256 as u64;
1905
1906 let t = (self.d0 as u128).wrapping_mul(q as u128);
1908 let r256 = (((r1 as u128) << 64) | (r0 as u128)).wrapping_sub(t);
1909 r1 = (r256 >> 64) as u64;
1910 r0 = r256 as u64;
1911
1912 q = q.wrapping_add(1);
1914
1915 let mask = if r1 >= q_lo { u64::MAX } else { 0 };
1920 q = q.wrapping_add(mask); let add = ((mask & self.d1) as u128) << 64 | ((mask & self.d0) as u128);
1922 let r256 = (((r1 as u128) << 64) | (r0 as u128)).wrapping_add(add);
1923 r1 = (r256 >> 64) as u64;
1924 r0 = r256 as u64;
1925
1926 if r1 > self.d1 || (r1 == self.d1 && r0 >= self.d0) {
1930 q = q.wrapping_add(1);
1931 let r256 = (((r1 as u128) << 64) | (r0 as u128))
1932 .wrapping_sub(((self.d1 as u128) << 64) | (self.d0 as u128));
1933 r1 = (r256 >> 64) as u64;
1934 r0 = r256 as u64;
1935 }
1936
1937 (q, r1, r0)
1938 }
1939}
1940
1941pub(crate) fn limbs_divmod_dispatch_u64(
1943 num: &[u64],
1944 den: &[u64],
1945 quot: &mut [u64],
1946 rem: &mut [u64],
1947) {
1948 const BZ_THRESHOLD_U64: usize = 16; let mut n = den.len();
1951 while n > 0 && den[n - 1] == 0 {
1952 n -= 1;
1953 }
1954 assert!(n > 0, "limbs_divmod_dispatch_u64: divide by zero");
1955
1956 let mut top = num.len();
1957 while top > 0 && num[top - 1] == 0 {
1958 top -= 1;
1959 }
1960
1961 if n == 1 {
1964 limbs_divmod_u64(num, den, quot, rem);
1965 return;
1966 }
1967
1968 if n >= BZ_THRESHOLD_U64 && top >= 2 * n {
1969 limbs_divmod_bz_u64(num, den, quot, rem);
1970 } else {
1971 limbs_divmod_knuth_u64(num, den, quot, rem);
1972 }
1973}
1974
1975pub(crate) fn limbs_divmod_knuth_u64(
1982 num: &[u64],
1983 den: &[u64],
1984 quot: &mut [u64],
1985 rem: &mut [u64],
1986) {
1987 for q in quot.iter_mut() {
1988 *q = 0;
1989 }
1990 for r in rem.iter_mut() {
1991 *r = 0;
1992 }
1993
1994 let mut n = den.len();
1995 while n > 0 && den[n - 1] == 0 {
1996 n -= 1;
1997 }
1998 assert!(n > 0, "limbs_divmod_knuth_u64: divide by zero");
1999
2000 let mut top = num.len();
2001 while top > 0 && num[top - 1] == 0 {
2002 top -= 1;
2003 }
2004 if top < n {
2005 let copy_n = num.len().min(rem.len());
2006 let mut i = 0;
2007 while i < copy_n {
2008 rem[i] = num[i];
2009 i += 1;
2010 }
2011 return;
2012 }
2013
2014 let shift = den[n - 1].leading_zeros();
2015 let mut u = [0u64; SCRATCH_LIMBS_U64];
2016 let mut v = [0u64; SCRATCH_LIMBS_U64];
2017 debug_assert!(top < SCRATCH_LIMBS_U64 && n <= SCRATCH_LIMBS_U64);
2018
2019 if shift == 0 {
2020 u[..top].copy_from_slice(&num[..top]);
2021 u[top] = 0;
2022 v[..n].copy_from_slice(&den[..n]);
2023 } else {
2024 let mut carry: u64 = 0;
2025 for i in 0..top {
2026 let val = num[i];
2027 u[i] = (val << shift) | carry;
2028 carry = val >> (64 - shift);
2029 }
2030 u[top] = carry;
2031 carry = 0;
2032 for i in 0..n {
2033 let val = den[i];
2034 v[i] = (val << shift) | carry;
2035 carry = val >> (64 - shift);
2036 }
2037 }
2038
2039 let m_plus_n = if u[top] != 0 { top + 1 } else { top };
2040 debug_assert!(m_plus_n >= n);
2041 let m = m_plus_n - n;
2042
2043 if n == 1 {
2048 limbs_divmod_u64(num, den, quot, rem);
2049 return;
2050 }
2051
2052 let v_top = v[n - 1];
2060 let v_below = v[n - 2];
2061 let mg_top = MG2by1U64::new(v_top);
2062
2063 let mut j_plus_one = m + 1;
2064 while j_plus_one > 0 {
2065 j_plus_one -= 1;
2066 let j = j_plus_one;
2067
2068 let jn = j + n;
2069 let u_top = u[jn];
2070 let u_next = u[jn - 1];
2071
2072 let (mut q_hat, mut r_hat) = if u_top > v_top {
2078 (u64::MAX, u64::MAX)
2079 } else if u_top == v_top {
2080 let (r, of) = u_next.overflowing_add(v_top);
2081 (u64::MAX, if of { u64::MAX } else { r })
2082 } else {
2083 mg_top.div_rem(u_top, u_next)
2084 };
2085
2086 loop {
2091 let prod = (q_hat as u128) * (v_below as u128);
2092 let hi = (prod >> 64) as u64;
2093 let lo = prod as u64;
2094 let rhs_lo = u[jn - 2];
2095 let rhs_hi = r_hat;
2096 if hi < rhs_hi || (hi == rhs_hi && lo <= rhs_lo) {
2097 break;
2098 }
2099 q_hat = q_hat.wrapping_sub(1);
2100 let (new_r, of) = r_hat.overflowing_add(v_top);
2101 if of {
2102 break;
2103 }
2104 r_hat = new_r;
2105 }
2106
2107 let mut mul_carry: u64 = 0;
2109 let mut borrow: u64 = 0;
2110 for i in 0..n {
2111 let prod = (q_hat as u128) * (v[i] as u128);
2112 let prod_lo = prod as u64;
2113 let prod_hi = (prod >> 64) as u64;
2114 let (s_prod, c1) = prod_lo.overflowing_add(mul_carry);
2115 let new_mul_carry = prod_hi + (c1 as u64);
2116 let (s1, b1) = u[j + i].overflowing_sub(s_prod);
2117 let (s2, b2) = s1.overflowing_sub(borrow);
2118 u[j + i] = s2;
2119 borrow = (b1 as u64) + (b2 as u64);
2120 mul_carry = new_mul_carry;
2121 }
2122 let (s1, b1) = u[j + n].overflowing_sub(mul_carry);
2123 let (s2, b2) = s1.overflowing_sub(borrow);
2124 u[j + n] = s2;
2125 let final_borrow = (b1 as u64) + (b2 as u64);
2126
2127 if final_borrow != 0 {
2128 q_hat = q_hat.wrapping_sub(1);
2129 let mut carry: u64 = 0;
2130 for i in 0..n {
2131 let (s1, c1) = u[j + i].overflowing_add(v[i]);
2132 let (s2, c2) = s1.overflowing_add(carry);
2133 u[j + i] = s2;
2134 carry = (c1 as u64) + (c2 as u64);
2135 }
2136 u[j + n] = u[j + n].wrapping_add(carry);
2137 }
2138
2139 if j < quot.len() {
2140 quot[j] = q_hat;
2141 }
2142 }
2143
2144 if shift == 0 {
2145 let copy_n = n.min(rem.len());
2146 rem[..copy_n].copy_from_slice(&u[..copy_n]);
2147 } else {
2148 for i in 0..n {
2149 if i < rem.len() {
2150 let lo = u[i] >> shift;
2151 let hi_into_lo = if i + 1 < n {
2152 u[i + 1] << (64 - shift)
2153 } else {
2154 0
2155 };
2156 rem[i] = lo | hi_into_lo;
2157 }
2158 }
2159 }
2160}
2161
2162pub(crate) fn limbs_divmod_bz_u64(
2164 num: &[u64],
2165 den: &[u64],
2166 quot: &mut [u64],
2167 rem: &mut [u64],
2168) {
2169 const BZ_THRESHOLD_U64: usize = 16;
2170
2171 let mut n = den.len();
2172 while n > 0 && den[n - 1] == 0 {
2173 n -= 1;
2174 }
2175 assert!(n > 0, "limbs_divmod_bz_u64: divide by zero");
2176
2177 let mut top = num.len();
2178 while top > 0 && num[top - 1] == 0 {
2179 top -= 1;
2180 }
2181
2182 if n < BZ_THRESHOLD_U64 || top < 2 * n {
2183 limbs_divmod_knuth_u64(num, den, quot, rem);
2184 return;
2185 }
2186
2187 for q in quot.iter_mut() {
2188 *q = 0;
2189 }
2190 for r in rem.iter_mut() {
2191 *r = 0;
2192 }
2193
2194 let chunks = top.div_ceil(n);
2195 let mut carry = [0u64; SCRATCH_LIMBS_U64];
2196 let mut buf = [0u64; SCRATCH_LIMBS_U64];
2197 let mut q_chunk = [0u64; SCRATCH_LIMBS_U64];
2198 let mut r_chunk = [0u64; SCRATCH_LIMBS_U64];
2199
2200 let mut idx = chunks;
2201 while idx > 0 {
2202 idx -= 1;
2203 let lo = idx * n;
2204 let hi = ((idx + 1) * n).min(top);
2205 buf.fill(0);
2206 let chunk_len = hi - lo;
2207 buf[..chunk_len].copy_from_slice(&num[lo..lo + chunk_len]);
2208 buf[chunk_len..chunk_len + n].copy_from_slice(&carry[..n]);
2209 let buf_len = chunk_len + n;
2210 limbs_divmod_knuth_u64(
2211 &buf[..buf_len],
2212 &den[..n],
2213 &mut q_chunk[..buf_len],
2214 &mut r_chunk[..n],
2215 );
2216 let store_end = (lo + n).min(quot.len());
2217 let store_len = store_end.saturating_sub(lo);
2218 quot[lo..lo + store_len].copy_from_slice(&q_chunk[..store_len]);
2219 carry[..n].copy_from_slice(&r_chunk[..n]);
2220 }
2221 let rem_n = n.min(rem.len());
2222 rem[..rem_n].copy_from_slice(&carry[..rem_n]);
2223}
2224
2225pub(crate) fn limbs_isqrt_u64(n: &[u64], out: &mut [u64]) {
2236 for o in out.iter_mut() {
2237 *o = 0;
2238 }
2239 let bits = limbs_bit_len_u64(n);
2240 if bits == 0 {
2241 return;
2242 }
2243 if bits <= 1 {
2244 out[0] = 1;
2245 return;
2246 }
2247 let work = n.len() + 1;
2248 debug_assert!(work <= SCRATCH_LIMBS_U64, "isqrt scratch overflow");
2249 let mut x = [0u64; SCRATCH_LIMBS_U64];
2250
2251 if bits >= 8 {
2269 let shift = bits - 64.min(bits);
2272 let limb_idx = (shift / 64) as usize;
2275 let bit_off = shift % 64;
2276 let top_u64: u64 = if bit_off == 0 {
2277 n[limb_idx]
2278 } else {
2279 let lo = n[limb_idx] >> bit_off;
2280 let hi = if limb_idx + 1 < n.len() {
2281 n[limb_idx + 1].checked_shl(64 - bit_off).unwrap_or(0)
2282 } else {
2283 0
2284 };
2285 lo | hi
2286 };
2287 let seed_f64 = (top_u64 as f64).sqrt();
2291 let (seed_f64, half_shift) = if (shift & 1) == 1 {
2295 (seed_f64 * core::f64::consts::SQRT_2, (shift - 1) / 2)
2296 } else {
2297 (seed_f64, shift / 2)
2298 };
2299 let truncated = seed_f64 as u128;
2312 let frac_nonzero = (truncated as f64) != seed_f64;
2313 let seed_int: u128 = truncated
2314 .saturating_add(if frac_nonzero { 1 } else { 0 })
2315 .saturating_add(1);
2316 let seed_limb_idx = (half_shift / 64) as usize;
2320 let seed_bit_off = half_shift % 64;
2321 let shifted: u128 = seed_int << seed_bit_off;
2322 let seed_lo = shifted as u64;
2323 let seed_hi = (shifted >> 64) as u64;
2324 if seed_limb_idx < work {
2325 x[seed_limb_idx] |= seed_lo;
2326 }
2327 if seed_limb_idx + 1 < work {
2328 x[seed_limb_idx + 1] |= seed_hi;
2329 }
2330 if limbs_is_zero_u64(&x[..work]) {
2334 x[0] = 1;
2335 }
2336 } else {
2337 let e = bits.div_ceil(2);
2339 x[(e / 64) as usize] |= 1u64 << (e % 64);
2340 }
2341
2342 loop {
2343 let mut q = [0u64; SCRATCH_LIMBS_U64];
2344 let mut r = [0u64; SCRATCH_LIMBS_U64];
2345 limbs_divmod_dispatch_u64(n, &x[..work], &mut q[..work], &mut r[..work]);
2346 limbs_add_assign_u64(&mut q[..work], &x[..work]);
2347 let mut y = [0u64; SCRATCH_LIMBS_U64];
2348 limbs_shr_u64(&q[..work], 1, &mut y[..work]);
2349 if limbs_cmp_u64(&y[..work], &x[..work]) >= 0 {
2350 break;
2351 }
2352 x = y;
2353 }
2354 let copy_len = if out.len() < work { out.len() } else { work };
2355 out[..copy_len].copy_from_slice(&x[..copy_len]);
2356}
2357
2358fn limbs_div_small_u64(limbs: &mut [u64], radix: u64) -> u64 {
2361 let mut rem: u64 = 0;
2362 for limb in limbs.iter_mut().rev() {
2363 let acc = ((rem as u128) << 64) | (*limb as u128);
2364 *limb = (acc / (radix as u128)) as u64;
2365 rem = (acc % (radix as u128)) as u64;
2366 }
2367 rem
2368}
2369
2370pub(crate) fn limbs_fmt_into_u64<'a>(
2372 limbs: &[u64],
2373 radix: u64,
2374 lower: bool,
2375 buf: &'a mut [u8],
2376) -> &'a str {
2377 let digits: &[u8] = if lower {
2378 b"0123456789abcdef"
2379 } else {
2380 b"0123456789ABCDEF"
2381 };
2382 if limbs_is_zero_u64(limbs) {
2383 let last = buf.len() - 1;
2384 buf[last] = b'0';
2385 return core::str::from_utf8(&buf[last..]).unwrap();
2386 }
2387 let mut work = [0u64; SCRATCH_LIMBS_U64];
2388 work[..limbs.len()].copy_from_slice(limbs);
2389 let wl = limbs.len();
2390 let mut pos = buf.len();
2391 while !limbs_is_zero_u64(&work[..wl]) {
2392 let r = limbs_div_small_u64(&mut work[..wl], radix);
2393 pos -= 1;
2394 buf[pos] = digits[r as usize];
2395 }
2396 core::str::from_utf8(&buf[pos..]).unwrap()
2397}
2398
2399#[inline]
2401pub(crate) const fn scmp_u64(a_neg: bool, a: &[u64], b_neg: bool, b: &[u64]) -> i32 {
2402 match (a_neg, b_neg) {
2403 (true, false) => -1,
2404 (false, true) => 1,
2405 _ => limbs_cmp_u64(a, b),
2406 }
2407}
2408
2409mod macros;
2414use macros::decl_wide_int;
2415
2416
2417#[inline]
2420pub(crate) const fn scmp(a_neg: bool, a: &[u128], b_neg: bool, b: &[u128]) -> i32 {
2421 match (a_neg, b_neg) {
2422 (true, false) => -1,
2423 (false, true) => 1,
2424 _ => limbs_cmp(a, b),
2425 }
2426}
2427
2428pub(crate) trait WideInt: Copy {
2435 const U128_LIMBS: usize = 1;
2444
2445 fn to_mag_sign(self) -> ([u64; 288], bool);
2448 fn from_mag_sign(mag: &[u64], negative: bool) -> Self;
2451
2452 #[inline]
2464 fn mag_into_u128(self, dst: &mut [u128]) -> bool {
2465 let (mag, neg) = self.to_mag_sign();
2466 let n_pairs = (288 / 2).min(dst.len());
2467 let mut i = 0;
2468 while i < n_pairs {
2469 dst[i] = (mag[2 * i] as u128) | ((mag[2 * i + 1] as u128) << 64);
2470 i += 1;
2471 }
2472 while i < dst.len() {
2473 dst[i] = 0;
2474 i += 1;
2475 }
2476 neg
2477 }
2478
2479 #[inline]
2485 fn from_mag_sign_u128(mag: &[u128], negative: bool) -> Self {
2486 let mut out = [0u64; 288];
2487 let n_pairs = mag.len().min(288 / 2);
2488 let mut i = 0;
2489 while i < n_pairs {
2490 out[2 * i] = mag[i] as u64;
2491 out[2 * i + 1] = (mag[i] >> 64) as u64;
2492 i += 1;
2493 }
2494 Self::from_mag_sign(&out, negative)
2495 }
2496}
2497
2498macro_rules! impl_wideint_signed_prim {
2503 ($($t:ty),*) => {$(
2504 impl WideInt for $t {
2505 #[inline]
2506 fn to_mag_sign(self) -> ([u64; 288], bool) {
2507 let mut out = [0u64; 288];
2508 let mag = self.unsigned_abs() as u128;
2509 out[0] = mag as u64;
2510 out[1] = (mag >> 64) as u64;
2511 (out, self < 0)
2512 }
2513 #[inline]
2514 fn from_mag_sign(mag: &[u64], negative: bool) -> $t {
2515 let lo = mag.first().copied().unwrap_or(0) as u128;
2516 let hi = mag.get(1).copied().unwrap_or(0) as u128;
2517 let combined = lo | (hi << 64);
2518 let m = combined as $t;
2519 if negative { m.wrapping_neg() } else { m }
2520 }
2521 }
2522 )*};
2523}
2524impl_wideint_signed_prim!(i8, i16, i32, i64, i128);
2525
2526impl WideInt for u128 {
2527 #[inline]
2528 fn to_mag_sign(self) -> ([u64; 288], bool) {
2529 let mut out = [0u64; 288];
2530 out[0] = self as u64;
2531 out[1] = (self >> 64) as u64;
2532 (out, false)
2533 }
2534 #[inline]
2535 fn from_mag_sign(mag: &[u64], _negative: bool) -> u128 {
2536 let lo = mag.first().copied().unwrap_or(0) as u128;
2537 let hi = mag.get(1).copied().unwrap_or(0) as u128;
2538 lo | (hi << 64)
2539 }
2540}
2541
2542#[inline]
2545pub(crate) fn wide_cast<S: WideInt, T: WideInt>(src: S) -> T {
2546 let (mag, negative) = src.to_mag_sign();
2547 T::from_mag_sign(&mag, negative)
2548}
2549
2550pub(crate) trait WideStorage:
2558 Copy
2559 + PartialEq
2560 + PartialOrd
2561 + WideInt
2562 + ::core::ops::Add<Output = Self>
2563 + ::core::ops::Sub<Output = Self>
2564 + ::core::ops::Mul<Output = Self>
2565 + ::core::ops::Div<Output = Self>
2566 + ::core::ops::Rem<Output = Self>
2567 + ::core::ops::Neg<Output = Self>
2568 + ::core::ops::Shl<u32, Output = Self>
2569 + ::core::ops::Shr<u32, Output = Self>
2570{
2571 const BITS: u32;
2573 const ZERO: Self;
2575 const ONE: Self;
2577 const TEN: Self;
2580
2581 fn pow(self, exp: u32) -> Self;
2583 fn isqrt(self) -> Self;
2585 fn resize_to<T: WideStorage>(self) -> T;
2587 fn leading_zeros(self) -> u32;
2589
2590 fn div_rem(self, rhs: Self) -> (Self, Self);
2592 fn bit(self, idx: u32) -> bool;
2594 fn from_i128(v: i128) -> Self;
2596 fn checked_mul_u64(self, n: u64) -> Self;
2599 fn to_f64(self) -> f64;
2601 fn from_f64_val(v: f64) -> Self;
2603}
2604
2605decl_wide_int!(Uint192, Int192, 3, 6, 4);
2611decl_wide_int!(Uint256, Int256, 4, 8, 5);
2612decl_wide_int!(Uint384, Int384, 6, 12, 7);
2613decl_wide_int!(Uint512, Int512, 8, 16, 9);
2614decl_wide_int!(Uint768, Int768, 12, 24, 13);
2615decl_wide_int!(Uint1024, Int1024, 16, 32, 17);
2616decl_wide_int!(Uint1536, Int1536, 24, 48, 25);
2617decl_wide_int!(Uint2048, Int2048, 32, 64, 33);
2618decl_wide_int!(Uint3072, Int3072, 48, 96, 49);
2619decl_wide_int!(Uint4096, Int4096, 64, 128, 65);
2620decl_wide_int!(Uint6144, Int6144, 96, 192, 97);
2621decl_wide_int!(Uint8192, Int8192, 128, 256, 129);
2622decl_wide_int!(Uint12288, Int12288, 192, 384, 193);
2623decl_wide_int!(Uint16384, Int16384, 256, 512, 257);
2624
2625macro_rules! impl_wide_storage {
2629 ($($S:ty),* $(,)?) => {$(
2630 impl WideStorage for $S {
2631 const BITS: u32 = <$S>::BITS;
2632 const ZERO: Self = <$S>::ZERO;
2633 const ONE: Self = <$S>::ONE;
2634 const TEN: Self = <$S>::from_i128(10);
2635
2636 #[inline]
2637 fn pow(self, exp: u32) -> Self {
2638 <$S>::pow(self, exp)
2639 }
2640 #[inline]
2641 fn isqrt(self) -> Self {
2642 <$S>::isqrt(self)
2643 }
2644 #[inline]
2645 fn resize_to<T: WideStorage>(self) -> T {
2646 <$S>::resize::<T>(self)
2649 }
2650 #[inline]
2651 fn leading_zeros(self) -> u32 {
2652 <$S>::leading_zeros(self)
2653 }
2654 #[inline]
2655 fn div_rem(self, rhs: Self) -> (Self, Self) {
2656 <$S>::div_rem(self, rhs)
2657 }
2658 #[inline]
2659 fn bit(self, idx: u32) -> bool {
2660 <$S>::bit(self, idx)
2661 }
2662 #[inline]
2663 fn from_i128(v: i128) -> Self {
2664 <$S>::from_i128(v)
2665 }
2666 #[inline]
2667 fn checked_mul_u64(self, n: u64) -> Self {
2668 <$S>::checked_mul_u64(self, n)
2669 }
2670 #[inline]
2671 fn to_f64(self) -> f64 {
2672 <$S>::as_f64(self)
2673 }
2674 #[inline]
2675 fn from_f64_val(v: f64) -> Self {
2676 <$S>::from_f64(v)
2677 }
2678 }
2679 )*};
2680}
2681
2682impl_wide_storage!(
2683 Int192, Int256, Int384, Int512, Int768, Int1024, Int1536, Int2048,
2684 Int3072, Int4096, Int6144, Int8192, Int12288, Int16384,
2685);
2686
2687#[cfg(any(feature = "d57", feature = "wide"))]
2697pub(crate) use self::{Int192 as I192, Uint192 as U192};
2698#[cfg(any(feature = "d57", feature = "d76", feature = "wide"))]
2699pub(crate) use self::Int384 as I384;
2700#[cfg(any(feature = "d115", feature = "wide"))]
2701pub(crate) use self::Uint384 as U384;
2702#[cfg(any(feature = "d76", feature = "wide"))]
2703pub(crate) use self::{Int256 as I256, Uint256 as U256};
2704#[cfg(any(feature = "d76", feature = "d115", feature = "d153", feature = "wide"))]
2705pub(crate) use self::Int512 as I512;
2706#[cfg(any(feature = "d153", feature = "wide"))]
2707pub(crate) use self::Uint512 as U512;
2708#[cfg(any(feature = "d115", feature = "d153", feature = "d230", feature = "wide"))]
2709pub(crate) use self::Int768 as I768;
2710#[cfg(any(feature = "d230", feature = "wide"))]
2711pub(crate) use self::Uint768 as U768;
2712#[cfg(any(feature = "d153", feature = "d230", feature = "d307", feature = "wide", feature = "x-wide"))]
2713pub(crate) use self::Int1024 as I1024;
2714#[cfg(any(feature = "d230", feature = "d307", feature = "d462", feature = "wide", feature = "x-wide"))]
2715pub(crate) use self::Int1536 as I1536;
2716#[cfg(any(feature = "d462", feature = "x-wide"))]
2717pub(crate) use self::Uint1536 as U1536;
2718#[cfg(any(feature = "d307", feature = "d462", feature = "d616", feature = "wide", feature = "x-wide"))]
2719pub(crate) use self::{Int2048 as I2048, Uint1024 as U1024};
2720#[cfg(any(feature = "d616", feature = "x-wide"))]
2721pub(crate) use self::Uint2048 as U2048;
2722#[cfg(any(feature = "d462", feature = "d616", feature = "d924", feature = "x-wide", feature = "xx-wide"))]
2723pub(crate) use self::Int3072 as I3072;
2724#[cfg(any(feature = "d924", feature = "xx-wide"))]
2725pub(crate) use self::Uint3072 as U3072;
2726#[cfg(any(feature = "d616", feature = "d924", feature = "d1232", feature = "x-wide", feature = "xx-wide"))]
2727pub(crate) use self::Int4096 as I4096;
2728#[cfg(any(feature = "d1232", feature = "xx-wide"))]
2729pub(crate) use self::Uint4096 as U4096;
2730#[cfg(any(feature = "d924", feature = "d1232", feature = "xx-wide"))]
2731pub(crate) use self::Int6144 as I6144;
2732#[cfg(any(feature = "d1232", feature = "xx-wide"))]
2733pub(crate) use self::Int8192 as I8192;
2734#[cfg(any(feature = "d924", feature = "xx-wide"))]
2735#[allow(unused_imports)]
2736pub(crate) use self::Int12288 as I12288;
2737#[cfg(any(feature = "d1232", feature = "xx-wide"))]
2738#[allow(unused_imports)]
2739pub(crate) use self::Int16384 as I16384;
2740
2741#[cfg(test)]
2742mod karatsuba_tests {
2743 use super::*;
2744
2745 #[test]
2748 fn karatsuba_matches_schoolbook_at_n16() {
2749 let a: [u128; 16] = core::array::from_fn(|i| (i as u128) * 0xdead_beef + 1);
2750 let b: [u128; 16] = core::array::from_fn(|i| 0xcafe_babe ^ ((i as u128) << 5));
2751 let mut s = [0u128; 32];
2752 let mut k = [0u128; 32];
2753 limbs_mul(&a, &b, &mut s);
2754 limbs_mul_karatsuba(&a, &b, &mut k);
2755 assert_eq!(s, k);
2756 }
2757
2758 #[test]
2759 fn karatsuba_matches_schoolbook_at_n32() {
2760 let a: [u128; 32] = core::array::from_fn(|i| (i as u128).wrapping_mul(0x1234_5678_9abc));
2761 let b: [u128; 32] = core::array::from_fn(|i| (i as u128 + 1).wrapping_mul(0xfedc_ba98));
2762 let mut s = [0u128; 64];
2763 let mut k = [0u128; 64];
2764 limbs_mul(&a, &b, &mut s);
2765 limbs_mul_karatsuba(&a, &b, &mut k);
2766 assert_eq!(s, k);
2767 }
2768
2769 #[test]
2770 fn karatsuba_handles_zero_inputs() {
2771 let a = [0u128; 16];
2772 let b: [u128; 16] = core::array::from_fn(|i| (i as u128) + 1);
2773 let mut k = [0u128; 32];
2774 limbs_mul_karatsuba(&a, &b, &mut k);
2775 for o in &k {
2776 assert_eq!(*o, 0);
2777 }
2778 }
2779}
2780
2781#[cfg(test)]
2782mod hint_tests {
2783 use super::*;
2784
2785 #[test]
2786 fn signed_add_sub_neg() {
2787 let a = Int256::from_i128(5);
2788 let b = Int256::from_i128(3);
2789 assert_eq!(a.wrapping_add(b), Int256::from_i128(8));
2790 assert_eq!(a.wrapping_sub(b), Int256::from_i128(2));
2791 assert_eq!(b.wrapping_sub(a), Int256::from_i128(-2));
2792 assert_eq!(a.negate(), Int256::from_i128(-5));
2793 assert_eq!(Int256::ZERO.negate(), Int256::ZERO);
2794 }
2795
2796 #[test]
2797 fn signed_mul_div_rem() {
2798 let six = Int512::from_i128(6);
2799 let two = Int512::from_i128(2);
2800 let three = Int512::from_i128(3);
2801 assert_eq!(six.wrapping_mul(three), Int512::from_i128(18));
2802 assert_eq!(six.wrapping_div(two), three);
2803 assert_eq!(Int512::from_i128(7).wrapping_rem(three), Int512::from_i128(1));
2804 assert_eq!(Int512::from_i128(-7).wrapping_rem(three), Int512::from_i128(-1));
2805 assert_eq!(six.negate().wrapping_mul(three), Int512::from_i128(-18));
2806 }
2807
2808 #[test]
2809 fn checked_overflow() {
2810 assert_eq!(Int256::MAX.checked_add(Int256::ONE), None);
2811 assert_eq!(Int256::MIN.checked_sub(Int256::ONE), None);
2812 assert_eq!(Int256::MIN.checked_neg(), None);
2813 assert_eq!(
2814 Int256::from_i128(2).checked_add(Int256::from_i128(3)),
2815 Some(Int256::from_i128(5))
2816 );
2817 }
2818
2819 #[test]
2820 fn from_str_and_pow() {
2821 let ten = Int1024::from_str_radix("10", 10).unwrap();
2822 assert_eq!(ten, Int1024::from_i128(10));
2823 assert_eq!(ten.pow(3), Int1024::from_i128(1000));
2824 let big = Int1024::from_str_radix("10", 10).unwrap().pow(40);
2825 let from_str = Int1024::from_str_radix(
2826 "10000000000000000000000000000000000000000",
2827 10,
2828 )
2829 .unwrap();
2830 assert_eq!(big, from_str);
2831 assert_eq!(Int256::from_str_radix("-42", 10).unwrap(), Int256::from_i128(-42));
2832 }
2833
2834 #[test]
2835 fn ordering_and_resize() {
2836 assert!(Int256::from_i128(-1) < Int256::ZERO);
2837 assert!(Int256::MIN < Int256::MAX);
2838 let v = Int256::from_i128(-123_456_789);
2839 let wide: Int1024 = v.resize();
2840 let back: Int256 = wide.resize();
2841 assert_eq!(back, v);
2842 assert_eq!(wide, Int1024::from_i128(-123_456_789));
2843 }
2844
2845 #[test]
2846 fn isqrt_and_f64() {
2847 assert_eq!(Int512::from_i128(144).isqrt(), Int512::from_i128(12));
2848 assert_eq!(Int256::from_i128(1_000_000).as_f64(), 1_000_000.0);
2849 assert_eq!(Int256::from_f64(-2_500.0), Int256::from_i128(-2500));
2850 }
2851
2852 #[test]
2857 fn uint256_is_zero_and_bit_helpers() {
2858 let zero = Uint256::ZERO;
2859 let one = Uint256::from_str_radix("1", 10).unwrap();
2860 let two = Uint256::from_str_radix("2", 10).unwrap();
2861 assert!(zero.is_zero());
2862 assert!(!one.is_zero());
2863 assert!(one.is_power_of_two());
2864 assert!(two.is_power_of_two());
2865 let three = Uint256::from_str_radix("3", 10).unwrap();
2866 assert!(!three.is_power_of_two());
2867 assert_eq!(zero.next_power_of_two(), one);
2869 assert_eq!(one.next_power_of_two(), one);
2871 let four = Uint256::from_str_radix("4", 10).unwrap();
2873 assert_eq!(three.next_power_of_two(), four);
2874 assert_eq!(zero.count_ones(), 0);
2876 assert_eq!(one.count_ones(), 1);
2877 assert_eq!(zero.leading_zeros(), Uint256::BITS);
2878 assert_eq!(one.leading_zeros(), Uint256::BITS - 1);
2879 }
2880
2881 #[test]
2882 fn uint256_parse_arithmetic_and_pow() {
2883 assert!(Uint256::from_str_radix("10", 2).is_err());
2885 assert!(Uint256::from_str_radix("1a", 10).is_err());
2887 let two = Uint256::from_str_radix("2", 10).unwrap();
2889 let three = Uint256::from_str_radix("3", 10).unwrap();
2890 let six = Uint256::from_str_radix("6", 10).unwrap();
2891 let seven = Uint256::from_str_radix("7", 10).unwrap();
2892 assert_eq!(three - two, Uint256::from_str_radix("1", 10).unwrap());
2893 assert_eq!(six / two, three);
2894 assert_eq!(seven % three, Uint256::from_str_radix("1", 10).unwrap());
2895 let five = Uint256::from_str_radix("5", 10).unwrap(); let four = Uint256::from_str_radix("4", 10).unwrap(); let one = Uint256::from_str_radix("1", 10).unwrap(); assert_eq!(five & four, four); assert_eq!(five | one, five); assert_eq!(five ^ four, one); let p10 = two.pow(10);
2904 assert_eq!(p10, Uint256::from_str_radix("1024", 10).unwrap());
2905 let signed = three.cast_signed();
2907 assert_eq!(signed, Int256::from_i128(3));
2908 }
2909
2910 #[test]
2913 fn signed_bit_and_trailing_zeros() {
2914 let v = Int256::from_i128(0b1100);
2915 assert!(v.bit(2));
2916 assert!(v.bit(3));
2917 assert!(!v.bit(0));
2918 assert!(!v.bit(1));
2919 assert!(!v.bit(1000));
2921 let n = Int256::from_i128(-1);
2923 assert!(n.bit(1000));
2924 assert_eq!(Int256::from_i128(8).trailing_zeros(), 3);
2926 assert_eq!(Int256::ZERO.trailing_zeros(), Int256::BITS);
2927 }
2928}
2929
2930#[cfg(test)]
2931mod slice_tests {
2932 use super::*;
2933
2934 #[test]
2935 fn mul_and_divmod_round_trip() {
2936 let a = [123u128, 7, 0, 0];
2937 let b = [456u128, 0, 0, 0];
2938 let mut prod = [0u128; 8];
2939 limbs_mul(&a, &b, &mut prod);
2940 let mut q = [0u128; 8];
2941 let mut r = [0u128; 8];
2942 limbs_divmod(&prod, &b, &mut q, &mut r);
2943 assert_eq!(&q[..4], &a, "quotient");
2944 assert!(limbs_is_zero(&r), "remainder");
2945 }
2946
2947 #[test]
2948 fn shifts() {
2949 let a = [1u128, 0];
2950 let mut out = [0u128; 2];
2951 limbs_shl(&a, 130, &mut out);
2952 assert_eq!(out, [0, 4]);
2953 let mut back = [0u128; 2];
2954 limbs_shr(&out, 130, &mut back);
2955 assert_eq!(back, [1, 0]);
2956 }
2957
2958 #[test]
2959 fn isqrt_basic() {
2960 let n = [0u128, 0, 1, 0];
2961 let mut out = [0u128; 4];
2962 limbs_isqrt(&n, &mut out);
2963 assert_eq!(out, [0, 1, 0, 0]);
2964 let n = [144u128, 0];
2965 let mut out = [0u128; 2];
2966 limbs_isqrt(&n, &mut out);
2967 assert_eq!(out, [12, 0]);
2968 let n = [2u128, 0];
2969 let mut out = [0u128; 2];
2970 limbs_isqrt(&n, &mut out);
2971 assert_eq!(out, [1, 0]);
2972 }
2973
2974 #[test]
2975 fn add_sub_carry() {
2976 let mut a = [u128::MAX, 0];
2977 let carry = limbs_add_assign(&mut a, &[1, 0]);
2978 assert!(!carry);
2979 assert_eq!(a, [0, 1]);
2980 let borrow = limbs_sub_assign(&mut a, &[1, 0]);
2981 assert!(!borrow);
2982 assert_eq!(a, [u128::MAX, 0]);
2983 }
2984
2985 #[test]
2988 fn div_2_by_1_basics() {
2989 assert_eq!(div_2_by_1(0, 1, 1), (1, 0));
2991 assert_eq!(div_2_by_1(0, 5, 2), (2, 1));
2993 assert_eq!(div_2_by_1(3, 0, 4), (3 << 126, 0));
2995 let d = u128::MAX - 7;
2998 let (q, r) = div_2_by_1(d - 1, u128::MAX, d);
2999 let (mul_hi, mul_lo) = mul_128(q, d);
3001 let (sum_lo, c) = mul_lo.overflowing_add(r);
3002 let sum_hi = mul_hi + c as u128;
3003 assert_eq!(sum_hi, d - 1);
3004 assert_eq!(sum_lo, u128::MAX);
3005 assert!(r < d);
3006 }
3007
3008 fn pack(limbs: &[u128]) -> alloc::vec::Vec<u64> {
3012 let mut out = alloc::vec![0u64; 2 * limbs.len()];
3013 for (i, &l) in limbs.iter().enumerate() {
3014 out[2 * i] = l as u64;
3015 out[2 * i + 1] = (l >> 64) as u64;
3016 }
3017 out
3018 }
3019
3020 fn unpack(words: &[u64]) -> alloc::vec::Vec<u128> {
3022 assert!(words.len() % 2 == 0);
3023 let mut out = alloc::vec![0u128; words.len() / 2];
3024 for i in 0..out.len() {
3025 out[i] = (words[2 * i] as u128) | ((words[2 * i + 1] as u128) << 64);
3026 }
3027 out
3028 }
3029
3030 fn corpus() -> alloc::vec::Vec<alloc::vec::Vec<u128>> {
3031 alloc::vec![
3032 alloc::vec![0u128, 0, 0, 0],
3033 alloc::vec![1u128, 0, 0, 0],
3034 alloc::vec![u128::MAX, 0, 0, 0],
3035 alloc::vec![u128::MAX, u128::MAX, 0, 0],
3036 alloc::vec![u128::MAX, u128::MAX, u128::MAX, u128::MAX],
3037 alloc::vec![123u128, 456, 0, 0],
3038 alloc::vec![
3039 0x1234_5678_9abc_def0_fedc_ba98_7654_3210_u128,
3040 0xa5a5_a5a5_5a5a_5a5a_3c3c_3c3c_c3c3_c3c3,
3041 0,
3042 0,
3043 ],
3044 ]
3045 }
3046
3047 #[test]
3050 fn limbs_mul_u64_matches_u128() {
3051 for a in corpus() {
3052 for b in corpus() {
3053 let mut out128 = alloc::vec![0u128; a.len() + b.len()];
3054 limbs_mul(&a, &b, &mut out128);
3055
3056 let a64 = pack(&a);
3057 let b64 = pack(&b);
3058 let mut out64 = alloc::vec![0u64; a64.len() + b64.len()];
3059 limbs_mul_u64(&a64, &b64, &mut out64);
3060
3061 assert_eq!(unpack(&out64), out128, "limbs_mul mismatch");
3062 }
3063 }
3064 }
3065
3066 #[test]
3070 fn limbs_mul_karatsuba_u64_matches_schoolbook() {
3071 for a in corpus() {
3072 for b in corpus() {
3073 let a64 = pack(&a);
3074 let b64 = pack(&b);
3075 let n = a64.len().min(b64.len());
3076 if n < super::KARATSUBA_THRESHOLD_U64 {
3077 continue;
3078 }
3079 let mut a_buf = alloc::vec![0u64; n];
3080 let mut b_buf = alloc::vec![0u64; n];
3081 a_buf.copy_from_slice(&a64[..n]);
3082 b_buf.copy_from_slice(&b64[..n]);
3083 let mut out_school = alloc::vec![0u64; 2 * n];
3084 let mut out_kara = alloc::vec![0u64; 2 * n];
3085 limbs_mul_u64(&a_buf, &b_buf, &mut out_school);
3086 limbs_mul_karatsuba_u64(&a_buf, &b_buf, &mut out_kara);
3087 assert_eq!(out_kara, out_school, "Karatsuba mismatch at n={n}");
3088 }
3089 }
3090 }
3091
3092 #[test]
3099 fn limbs_mul_u64_fixed_matches_slice() {
3100 macro_rules! check {
3101 ($L:expr, $D:expr) => {{
3102 for a in corpus() {
3103 for b in corpus() {
3104 let a64 = pack(&a);
3105 let b64 = pack(&b);
3106 if a64.len() < $L || b64.len() < $L {
3107 continue;
3108 }
3109 let mut a_arr = [0u64; $L];
3110 let mut b_arr = [0u64; $L];
3111 a_arr.copy_from_slice(&a64[..$L]);
3112 b_arr.copy_from_slice(&b64[..$L]);
3113 let mut out_slice = alloc::vec![0u64; $D];
3114 let mut out_fixed = [0u64; $D];
3115 limbs_mul_u64(&a_arr, &b_arr, &mut out_slice);
3116 limbs_mul_u64_fixed::<$L, $D>(&a_arr, &b_arr, &mut out_fixed);
3117 assert_eq!(
3118 &out_slice[..],
3119 &out_fixed[..],
3120 "limbs_mul_u64_fixed::<{}, {}> mismatch",
3121 $L, $D
3122 );
3123 }
3124 }
3125 }};
3126 }
3127 check!(2, 4);
3128 check!(4, 8);
3129 check!(8, 16);
3130 check!(16, 32);
3131 check!(24, 48);
3132 check!(32, 64);
3133 check!(48, 96);
3134 check!(64, 128);
3135 }
3136
3137 #[test]
3145 fn limbs_mul_u64_into_matches_fixed() {
3146 let mut state: u64 = 0xDEAD_BEEF_CAFE_F00D;
3148 let mut next = || -> u64 {
3149 state = state.wrapping_add(0x9E37_79B9_7F4A_7C15);
3150 let mut z = state;
3151 z = (z ^ (z >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
3152 z = (z ^ (z >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
3153 z ^ (z >> 31)
3154 };
3155
3156 macro_rules! check_into {
3157 ($L:expr, $LP1:expr, $D:expr) => {{
3158 for _ in 0..1000 {
3159 let mut a = [0u64; $L];
3160 for slot in a.iter_mut() {
3161 *slot = next();
3162 }
3163 let n = next();
3164
3165 let mut out_into = [0u64; $LP1];
3166 limbs_mul_u64_into::<$L, $LP1>(&a, n, &mut out_into);
3167
3168 let mut b = [0u64; $L];
3169 b[0] = n;
3170 let mut out_fixed = [0u64; $D];
3171 limbs_mul_u64_fixed::<$L, $D>(&a, &b, &mut out_fixed);
3172
3173 assert_eq!(
3174 &out_into[..],
3175 &out_fixed[..$LP1],
3176 "limbs_mul_u64_into::<{}, {}> low limbs mismatch \
3177 (a={:?}, n={:#x})",
3178 $L, $LP1, a, n
3179 );
3180 for (k, &limb) in out_fixed[$LP1..].iter().enumerate() {
3181 assert_eq!(
3182 limb, 0,
3183 "limbs_mul_u64_fixed high limb {} not zero \
3184 — single-multiplier product must fit L+1 limbs",
3185 $LP1 + k
3186 );
3187 }
3188 }
3189 }};
3190 }
3191 check_into!(2, 3, 4);
3192 check_into!(3, 4, 6);
3193 check_into!(4, 5, 8);
3194 check_into!(6, 7, 12);
3195 check_into!(8, 9, 16);
3196 check_into!(16, 17, 32);
3197 }
3198
3199 #[test]
3201 fn limbs_divmod_u64_matches_u128() {
3202 for num in corpus() {
3203 for den in corpus() {
3204 if den.iter().all(|&x| x == 0) {
3205 continue;
3206 }
3207 let mut q128 = alloc::vec![0u128; num.len()];
3208 let mut r128 = alloc::vec![0u128; num.len()];
3209 limbs_divmod(&num, &den, &mut q128, &mut r128);
3210
3211 let n64 = pack(&num);
3212 let d64 = pack(&den);
3213 let mut q64 = alloc::vec![0u64; n64.len()];
3214 let mut r64 = alloc::vec![0u64; n64.len()];
3215 limbs_divmod_u64(&n64, &d64, &mut q64, &mut r64);
3216
3217 assert_eq!(unpack(&q64), q128, "divmod q mismatch");
3218 assert_eq!(unpack(&r64), r128, "divmod r mismatch");
3219 }
3220 }
3221 }
3222
3223 #[test]
3225 fn limbs_divmod_knuth_u64_matches_u128() {
3226 for num in corpus() {
3227 for den in corpus() {
3228 if den.iter().all(|&x| x == 0) {
3229 continue;
3230 }
3231 let mut q128 = alloc::vec![0u128; num.len()];
3232 let mut r128 = alloc::vec![0u128; num.len()];
3233 limbs_divmod_knuth(&num, &den, &mut q128, &mut r128);
3234
3235 let n64 = pack(&num);
3236 let d64 = pack(&den);
3237 let mut q64 = alloc::vec![0u64; n64.len()];
3238 let mut r64 = alloc::vec![0u64; n64.len()];
3239 limbs_divmod_knuth_u64(&n64, &d64, &mut q64, &mut r64);
3240
3241 assert_eq!(unpack(&q64), q128, "knuth q mismatch");
3242 assert_eq!(unpack(&r64), r128, "knuth r mismatch");
3243 }
3244 }
3245 }
3246
3247 #[test]
3254 fn mg3by2_u64_matches_reference() {
3255 let cases: &[(u64, u64, u64, u64, u64)] = &[
3256 (0, 0, 1, 1u64 << 63, 0),
3259 (0, 1, 0, 1u64 << 63, 0),
3260 ((1u64 << 63) - 1, u64::MAX, u64::MAX, 1u64 << 63, 1),
3261 (u64::MAX - 1, u64::MAX, u64::MAX, u64::MAX, u64::MAX),
3263 (0, 0, 1, u64::MAX, 1),
3264 (0xc0ffee, 0xdead_beef, 0xface_b00c, (1u64 << 63) | 0xc0ffee_u64, 0xdead_beef_face_b00c),
3266 (0, 1, 2, (1u64 << 63) | 1, 2),
3268 ];
3273 for &(n2, n1, n0, d1, d0) in cases {
3274 assert!(d1 >> 63 == 1, "d1 not normalised: {d1:#x}");
3275 assert!(
3276 n2 < d1 || (n2 == d1 && n1 < d0),
3277 "test precondition (n2, n1) < (d1, d0) violated"
3278 );
3279 let mg = MG3by2U64::new(d1, d0);
3280 let (q, r1, r0) = mg.div_rem(n2, n1, n0);
3281
3282 let num = alloc::vec![n0, n1, n2];
3286 let den = alloc::vec![d0, d1];
3287 let mut q_ref = alloc::vec![0u64; 3];
3288 let mut r_ref = alloc::vec![0u64; 3];
3289 limbs_divmod_u64(&num, &den, &mut q_ref, &mut r_ref);
3290
3291 assert_eq!(q_ref[0], q, "MG3by2 q mismatch for n=({n2:#x},{n1:#x},{n0:#x}) d=({d1:#x},{d0:#x})");
3292 assert_eq!(q_ref[1], 0, "MG3by2 q higher limb non-zero — precondition violated");
3293 assert_eq!(q_ref[2], 0, "MG3by2 q higher limb non-zero — precondition violated");
3294 assert_eq!(r_ref[0], r0, "MG3by2 r0 mismatch");
3295 assert_eq!(r_ref[1], r1, "MG3by2 r1 mismatch");
3296 }
3297 }
3298
3299 #[test]
3301 fn mg2by1_u64_matches_reference() {
3302 let cases: &[(u64, u64, u64)] = &[
3303 (0, 1, 1u64 << 63),
3304 (0, u64::MAX, 1u64 << 63),
3305 ((1u64 << 63) - 1, u64::MAX, 1u64 << 63),
3306 (0, 1, u64::MAX),
3307 (u64::MAX - 1, u64::MAX, u64::MAX),
3308 (12345, 67890, (1u64 << 63) | 0xdead_beef_u64),
3309 (u64::MAX - 1, 0, u64::MAX),
3310 ];
3311 for &(u1, u0, d) in cases {
3312 assert!(d >> 63 == 1);
3313 assert!(u1 < d);
3314 let mg = MG2by1U64::new(d);
3315 let (q, r) = mg.div_rem(u1, u0);
3316 let num = ((u1 as u128) << 64) | (u0 as u128);
3318 let exp_q = (num / (d as u128)) as u64;
3319 let exp_r = (num % (d as u128)) as u64;
3320 assert_eq!((q, r), (exp_q, exp_r), "MG u64 mismatch for {u1:#x}, {u0:#x}, d={d:#x}");
3321 }
3322 }
3323
3324 #[test]
3332 fn mg2by1_matches_div_2_by_1() {
3333 let cases: &[(u128, u128, u128)] = &[
3336 (0, 1, 1u128 << 127),
3338 (0, u128::MAX, 1u128 << 127),
3339 ((1u128 << 127) - 1, u128::MAX, 1u128 << 127),
3340 (0, 1, u128::MAX),
3342 (u128::MAX - 1, u128::MAX, u128::MAX),
3343 (u128::MAX - 1, u128::MAX, u128::MAX),
3346 (12345, 67890, (1u128 << 127) | 0xdead_beefu128),
3348 (u128::MAX - 1, 0, u128::MAX),
3351 (0x1234_5678_9abc_def0_u128 ^ 0xa5a5, 0xfedc_ba98_7654_3210_u128, (1u128 << 127) | 0xc0ffee_u128),
3353 ];
3354 for &(u1, u0, d) in cases {
3355 assert!(d >> 127 == 1, "test divisor not normalised: {d:#x}");
3356 assert!(u1 < d, "test precondition u1 < d violated: {u1:#x} >= {d:#x}");
3357 let (q_ref, r_ref) = div_2_by_1(u1, u0, d);
3358 let mg = MG2by1::new(d);
3359 let (q_mg, r_mg) = mg.div_rem(u1, u0);
3360 assert_eq!(
3361 (q_mg, r_mg),
3362 (q_ref, r_ref),
3363 "MG2by1 disagrees with div_2_by_1 for (u1={u1:#x}, u0={u0:#x}, d={d:#x})"
3364 );
3365 }
3366 }
3367
3368 #[test]
3373 fn knuth_matches_canonical_divmod() {
3374 let cases: &[(&[u128], &[u128])] = &[
3375 (&[42], &[7]),
3377 (&[u128::MAX, 0], &[2]),
3378 (&[1, 1, 0, 0], &[3]),
3380 (&[u128::MAX, u128::MAX, 1, 0], &[5, 9]),
3382 (&[u128::MAX, u128::MAX, u128::MAX, 0], &[1, 2, 3]),
3384 (&[100, 0, 0], &[200, 0, 1]),
3386 (
3388 &[0, 0, u128::MAX, u128::MAX],
3389 &[1, 2, u128::MAX],
3390 ),
3391 ];
3392 for (num, den) in cases {
3393 let mut q_canon = [0u128; 8];
3394 let mut r_canon = [0u128; 8];
3395 limbs_divmod(num, den, &mut q_canon, &mut r_canon);
3396 let mut q_knuth = [0u128; 8];
3397 let mut r_knuth = [0u128; 8];
3398 limbs_divmod_knuth(num, den, &mut q_knuth, &mut r_knuth);
3399 assert_eq!(q_canon, q_knuth, "quotient mismatch on {:?} / {:?}", num, den);
3400 assert_eq!(r_canon, r_knuth, "remainder mismatch on {:?} / {:?}", num, den);
3401 }
3402 }
3403
3404 #[test]
3408 fn bz_matches_canonical_divmod() {
3409 let mut num = [0u128; 16];
3412 for (i, slot) in num.iter_mut().enumerate() {
3413 *slot = (i as u128)
3414 .wrapping_mul(0x9E37_79B9_7F4A_7C15)
3415 .wrapping_add(i as u128);
3416 }
3417 let mut den = [0u128; 10];
3418 for (i, slot) in den.iter_mut().enumerate() {
3419 *slot = ((i + 1) as u128).wrapping_mul(0xBF58_476D_1CE4_E5B9);
3420 }
3421 let mut q_canon = [0u128; 16];
3422 let mut r_canon = [0u128; 16];
3423 limbs_divmod(&num, &den, &mut q_canon, &mut r_canon);
3424 let mut q_bz = [0u128; 16];
3425 let mut r_bz = [0u128; 16];
3426 limbs_divmod_bz(&num, &den, &mut q_bz, &mut r_bz);
3427 assert_eq!(q_canon, q_bz, "BZ quotient mismatch");
3428 assert_eq!(r_canon, r_bz, "BZ remainder mismatch");
3429 }
3430
3431 #[cfg(feature = "alloc")]
3435 #[test]
3436 fn fast_mul_dispatches_to_karatsuba_at_threshold() {
3437 let a: [u128; 16] = core::array::from_fn(|i| (i as u128).wrapping_mul(0xABCD) + 1);
3438 let b: [u128; 16] = core::array::from_fn(|i| (i as u128).wrapping_mul(0xBEEF) + 7);
3439 let mut fast = [0u128; 32];
3440 let mut school = [0u128; 32];
3441 limbs_mul_fast(&a, &b, &mut fast);
3442 limbs_mul(&a, &b, &mut school);
3443 assert_eq!(fast, school, "fast (Karatsuba) and schoolbook disagree");
3444 }
3445
3446 #[cfg(feature = "alloc")]
3449 #[test]
3450 fn fast_mul_falls_through_to_schoolbook_below_threshold() {
3451 let a: [u128; 8] = core::array::from_fn(|i| (i as u128).wrapping_mul(0x1234) + 1);
3452 let b: [u128; 8] = core::array::from_fn(|i| (i as u128).wrapping_mul(0x5678) + 3);
3453 let mut fast = [0u128; 16];
3454 let mut school = [0u128; 16];
3455 limbs_mul_fast(&a, &b, &mut fast);
3456 limbs_mul(&a, &b, &mut school);
3457 assert_eq!(fast, school);
3458 }
3459
3460 #[cfg(feature = "alloc")]
3466 #[test]
3467 fn karatsuba_safety_fallback_below_threshold() {
3468 let a: [u128; 4] = [123, 456, 789, 0];
3469 let b: [u128; 4] = [987, 654, 321, 0];
3470 let mut karatsuba_out = [0u128; 8];
3471 let mut school_out = [0u128; 8];
3472 limbs_mul_karatsuba(&a, &b, &mut karatsuba_out);
3473 limbs_mul(&a, &b, &mut school_out);
3474 assert_eq!(karatsuba_out, school_out);
3475 }
3476
3477 #[test]
3480 fn isqrt_one_short_circuit() {
3481 let n = [1u128, 0];
3482 let mut out = [0u128; 2];
3483 limbs_isqrt(&n, &mut out);
3484 assert_eq!(out, [1, 0]);
3485 }
3486
3487 #[test]
3490 fn isqrt_zero_short_circuit() {
3491 let n = [0u128, 0];
3492 let mut out = [0u128; 2];
3493 limbs_isqrt(&n, &mut out);
3494 assert_eq!(out, [0, 0]);
3495 }
3496
3497 #[test]
3501 fn wide_cast_into_u128_returns_first_limb() {
3502 let src = Int256::from_i128(123_456_789);
3503 let dst: u128 = wide_cast(src);
3504 assert_eq!(dst, 123_456_789);
3505 let dst: u128 = wide_cast(Int256::ZERO);
3507 assert_eq!(dst, 0);
3508 }
3509
3510 #[test]
3517 fn knuth_q_hat_cap_branch_matches_canonical() {
3518 let num: [u128; 4] = [0, 0, u128::MAX, u128::MAX >> 1];
3522 let den: [u128; 3] = [1, 2, u128::MAX >> 1];
3523 let mut q_canon = [0u128; 4];
3524 let mut r_canon = [0u128; 4];
3525 limbs_divmod(&num, &den, &mut q_canon, &mut r_canon);
3526 let mut q_knuth = [0u128; 4];
3527 let mut r_knuth = [0u128; 4];
3528 limbs_divmod_knuth(&num, &den, &mut q_knuth, &mut r_knuth);
3529 assert_eq!(q_canon, q_knuth);
3530 assert_eq!(r_canon, r_knuth);
3531 }
3532
3533 #[test]
3537 fn bz_strips_numerator_trailing_zeros() {
3538 let mut num = [0u128; 16];
3541 for slot in &mut num[..8] {
3542 *slot = 0xCAFE_F00D;
3543 }
3544 let mut den = [0u128; 10];
3545 den[0] = 7;
3546 let mut q_canon = [0u128; 16];
3547 let mut r_canon = [0u128; 16];
3548 limbs_divmod(&num, &den, &mut q_canon, &mut r_canon);
3549 let mut q_bz = [0u128; 16];
3550 let mut r_bz = [0u128; 16];
3551 limbs_divmod_bz(&num, &den, &mut q_bz, &mut r_bz);
3552 assert_eq!(q_canon, q_bz);
3553 assert_eq!(r_canon, r_bz);
3554 }
3555}