Skip to main content

malachite_nz/natural/arithmetic/
div_mod.rs

1// Copyright © 2026 Mikhail Hogrefe
2//
3// Uses code adopted from the GNU MP Library.
4//
5//      `mpn_dcpi1_div_qr`, `mpn_dcpi1_div_qr_n`, `mpn_preinv_mu_div_qr_itch`,
6//      `mpn_preinv_mu_div_qr`, `mpn_mu_div_qr_choose_in`, `mpn_mu_div_qr2`, `mpn_mu_div_qr`,
7//      `mpn_mu_div_qr_itch`, and `mpn_sbpi1_div_qr` contributed to the GNU project by Torbjörn
8//      Granlund.
9//
10//      `mpn_invertappr`, `mpn_bc_invertappr`, and `mpn_ni_invertappr` contributed to the GNU
11//      project by Marco Bodrato. The algorithm used here was inspired by ApproximateReciprocal from
12//      "Modern Computer Arithmetic", by Richard P. Brent and Paul Zimmermann. Special thanks to
13//      Paul Zimmermann for his very valuable suggestions on all the theoretical aspects during the
14//      work on this code.
15//
16//      Copyright © 1991-2018 Free Software Foundation, Inc.
17//
18// This file is part of Malachite.
19//
20// Malachite is free software: you can redistribute it and/or modify it under the terms of the GNU
21// Lesser General Public License (LGPL) as published by the Free Software Foundation; either version
22// 3 of the License, or (at your option) any later version. See <https://www.gnu.org/licenses/>.
23
24use crate::natural::InnerNatural::{Large, Small};
25use crate::natural::Natural;
26use crate::natural::arithmetic::add::{
27    limbs_add_limb_to_out, limbs_add_same_length_to_out,
28    limbs_add_same_length_with_carry_in_in_place_left, limbs_add_same_length_with_carry_in_to_out,
29    limbs_slice_add_limb_in_place, limbs_slice_add_same_length_in_place_left,
30};
31use crate::natural::arithmetic::div::{
32    limbs_div_divide_and_conquer_approx, limbs_div_schoolbook_approx,
33};
34use crate::natural::arithmetic::mul::mul_mod::{
35    limbs_mul_mod_base_pow_n_minus_1, limbs_mul_mod_base_pow_n_minus_1_next_size,
36    limbs_mul_mod_base_pow_n_minus_1_scratch_len,
37};
38use crate::natural::arithmetic::mul::{
39    limbs_mul_greater_to_out, limbs_mul_greater_to_out_scratch_len, limbs_mul_same_length_to_out,
40    limbs_mul_same_length_to_out_scratch_len, limbs_mul_to_out, limbs_mul_to_out_scratch_len,
41};
42use crate::natural::arithmetic::shl::{limbs_shl_to_out, limbs_slice_shl_in_place};
43use crate::natural::arithmetic::shr::{limbs_shr_to_out, limbs_slice_shr_in_place};
44use crate::natural::arithmetic::sub::{
45    limbs_sub_greater_in_place_left, limbs_sub_limb_in_place, limbs_sub_same_length_in_place_left,
46    limbs_sub_same_length_in_place_right, limbs_sub_same_length_to_out,
47    limbs_sub_same_length_with_borrow_in_in_place_left,
48    limbs_sub_same_length_with_borrow_in_in_place_right,
49    limbs_sub_same_length_with_borrow_in_to_out,
50};
51use crate::natural::arithmetic::sub_mul::limbs_sub_mul_limb_same_length_in_place_left;
52use crate::natural::comparison::cmp::limbs_cmp_same_length;
53use crate::natural::logic::not::limbs_not_to_out;
54use crate::platform::{
55    DC_DIV_QR_THRESHOLD, DC_DIVAPPR_Q_THRESHOLD, DoubleLimb, INV_MULMOD_BNM1_THRESHOLD,
56    INV_NEWTON_THRESHOLD, Limb, MAYBE_DCP1_DIVAPPR, MU_DIV_QR_SKEW_THRESHOLD, MU_DIV_QR_THRESHOLD,
57};
58use alloc::vec::Vec;
59use core::cmp::{Ordering::*, min};
60use core::mem::swap;
61use malachite_base::num::arithmetic::traits::{
62    CeilingDivAssignNegMod, CeilingDivNegMod, DivAssignMod, DivAssignRem, DivMod, DivRem,
63    WrappingAddAssign, WrappingSub, WrappingSubAssign, XMulYToZZ, XXDivModYToQR,
64};
65use malachite_base::num::basic::integers::PrimitiveInt;
66use malachite_base::num::basic::traits::{One, Zero};
67use malachite_base::num::basic::unsigneds::PrimitiveUnsigned;
68use malachite_base::num::conversion::traits::{HasHalf, JoinHalves, SplitInHalf};
69use malachite_base::num::logic::traits::LeadingZeros;
70use malachite_base::slices::{slice_move_left, slice_set_zero};
71
72// The highest bit of the input must be set.
73//
74// # Worst-case complexity
75// Constant time and additional memory.
76//
77// # Panics
78// Panics if `d` is zero.
79//
80// This is equivalent to `mpn_invert_limb`, or `invert_limb`, from `gmp-impl.h`, GMP 6.2.1.
81pub_crate_test! {limbs_invert_limb<
82    DT: From<T> + HasHalf<Half = T> + PrimitiveUnsigned + JoinHalves<Half = T> + SplitInHalf,
83    T: PrimitiveUnsigned,
84>(
85    d: T,
86) -> T {
87    (DT::join_halves(!d, T::MAX) / DT::from(d)).lower_half()
88}}
89
90// # Worst-case complexity
91// Constant time and additional memory.
92//
93// This is equivalent to `udiv_qrnnd_preinv` from `gmp-impl.h`, GMP 6.2.1.
94pub_crate_test! {div_mod_by_preinversion(
95    n_high: Limb,
96    n_low: Limb,
97    d: Limb,
98    d_inv: Limb
99) -> (Limb, Limb) {
100    let (mut q_high, q_low) = (DoubleLimb::from(n_high) * DoubleLimb::from(d_inv))
101        .wrapping_add(DoubleLimb::join_halves(n_high.wrapping_add(1), n_low))
102        .split_in_half();
103    let mut r = n_low.wrapping_sub(q_high.wrapping_mul(d));
104    if r > q_low {
105        let (r_plus_d, overflow) = r.overflowing_add(d);
106        if overflow {
107            q_high.wrapping_sub_assign(1);
108            r = r_plus_d;
109        }
110    } else if r >= d {
111        q_high.wrapping_add_assign(1);
112        r -= d;
113    }
114    (q_high, r)
115}}
116
117// Interpreting a slice of `Limb`s as the limbs (in ascending order) of a `Natural`, returns the
118// quotient limbs and remainder of the `Natural` divided by a `Limb`. The divisor limb cannot be
119// zero and the limb slice must have at least two elements.
120//
121// # Worst-case complexity
122// $T(n) = O(n)$
123//
124// $M(n) = O(n)$
125//
126// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
127//
128// # Panics
129// Panics if the length of `ns` is less than 2 or if `d` is zero.
130//
131// This is equivalent to `mpn_divrem_1` from `mpn/generic/divrem_1.c`, GMP 6.2.1, where `qxn == 0`,
132// `un > 1`, and both results are returned. Experiments show that `DIVREM_1_NORM_THRESHOLD` and
133// `DIVREM_1_UNNORM_THRESHOLD` are unnecessary (they would always be 0).
134pub_test! {limbs_div_limb_mod(ns: &[Limb], d: Limb) -> (Vec<Limb>, Limb) {
135    let mut qs = vec![0; ns.len()];
136    let r = limbs_div_limb_to_out_mod(&mut qs, ns, d);
137    (qs, r)
138}}
139
140// Interpreting a slice of `Limb`s as the limbs (in ascending order) of a `Natural`, writes the
141// limbs of the quotient of the `Natural` and a `Limb` to an output slice, and returns the
142// remainder. The output slice must be at least as long as the input slice. The divisor limb cannot
143// be zero and the input limb slice must have at least two elements.
144//
145// # Worst-case complexity
146// $T(n) = O(n)$
147//
148// $M(n) = O(1)$
149//
150// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
151//
152// # Panics
153// Panics if `out` is shorter than `ns`, the length of `ns` is less than 2, or if `d` is zero.
154//
155// This is equivalent to `mpn_divrem_1` from `mpn/generic/divrem_1.c`, GMP 6.2.1, where `qxn == 0`
156// and `un > 1`. Experiments show that `DIVREM_1_NORM_THRESHOLD` and `DIVREM_1_UNNORM_THRESHOLD` are
157// unnecessary (they would always be 0).
158pub_crate_test! {limbs_div_limb_to_out_mod(out: &mut [Limb], ns: &[Limb], d: Limb) -> Limb {
159    assert_ne!(d, 0);
160    let len = ns.len();
161    assert!(len > 1);
162    let out = &mut out[..len];
163    let bits = LeadingZeros::leading_zeros(d);
164    if bits == 0 {
165        // High quotient limb is 0 or 1, skip a divide step.
166        let (r, ns_init) = ns.split_last().unwrap();
167        let mut r = *r;
168        let (out_last, out_init) = out.split_last_mut().unwrap();
169        let adjust = r >= d;
170        if adjust {
171            r -= d;
172        }
173        *out_last = Limb::from(adjust);
174        // Multiply-by-inverse, divisor already normalized.
175        let d_inv = limbs_invert_limb::<DoubleLimb, Limb>(d);
176        for (out_q, &n) in out_init.iter_mut().zip(ns_init.iter()).rev() {
177            (*out_q, r) = div_mod_by_preinversion(r, n, d, d_inv);
178        }
179        r
180    } else {
181        // Skip a division if high < divisor (high quotient 0). Testing here before normalizing will
182        // still skip as often as possible.
183        let (ns_last, ns_init) = ns.split_last().unwrap();
184        let (ns, mut r) = if *ns_last < d {
185            *out.last_mut().unwrap() = 0;
186            (ns_init, *ns_last)
187        } else {
188            (ns, 0)
189        };
190        let d = d << bits;
191        r <<= bits;
192        let d_inv = limbs_invert_limb::<DoubleLimb, Limb>(d);
193        let (previous_n, ns_init) = ns.split_last().unwrap();
194        let mut previous_n = *previous_n;
195        let cobits = Limb::WIDTH - bits;
196        r |= previous_n >> cobits;
197        let (out_head, out_tail) = out.split_first_mut().unwrap();
198        for (out_q, &n) in out_tail.iter_mut().zip(ns_init.iter()).rev() {
199            let n_shifted = (previous_n << bits) | (n >> cobits);
200            (*out_q, r) = div_mod_by_preinversion(r, n_shifted, d, d_inv);
201            previous_n = n;
202        }
203        let out_r;
204        (*out_head, out_r) = div_mod_by_preinversion(r, previous_n << bits, d, d_inv);
205        out_r >> bits
206    }
207}}
208
209// Interpreting a slice of `Limb`s as the limbs (in ascending order) of a `Natural`, writes the
210// limbs of the quotient of the `Natural` and a `Limb` to the input slice and returns the remainder.
211// The divisor limb cannot be zero and the input limb slice must have at least two elements.
212//
213// # Worst-case complexity
214// $T(n) = O(n)$
215//
216// $M(n) = O(1)$
217//
218// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
219//
220// # Panics
221// Panics if the length of `ns` is less than 2 or if `d` is zero.
222//
223// This is equivalent to `mpn_divrem_1` from `mpn/generic/divrem_1.c`, GMP 6.2.1, where `qp == up`,
224// `qxn == 0`, and `un > 1`. Experiments show that `DIVREM_1_NORM_THRESHOLD` and
225// `DIVREM_1_UNNORM_THRESHOLD` are unnecessary (they would always be 0).
226pub_crate_test! {limbs_div_limb_in_place_mod(ns: &mut [Limb], d: Limb) -> Limb {
227    assert_ne!(d, 0);
228    let len = ns.len();
229    assert!(len > 1);
230    let bits = LeadingZeros::leading_zeros(d);
231    let (ns_last, ns_init) = ns.split_last_mut().unwrap();
232    if bits == 0 {
233        // High quotient limb is 0 or 1, skip a divide step.
234        let mut r = *ns_last;
235        let adjust = r >= d;
236        if adjust {
237            r -= d;
238        }
239        *ns_last = Limb::from(adjust);
240        // Multiply-by-inverse, divisor already normalized.
241        let d_inv = limbs_invert_limb::<DoubleLimb, Limb>(d);
242        for n in ns_init.iter_mut().rev() {
243            (*n, r) = div_mod_by_preinversion(r, *n, d, d_inv);
244        }
245        r
246    } else {
247        // Skip a division if high < divisor (high quotient 0). Testing here before normalizing will
248        // still skip as often as possible.
249        let (ns, mut r) = if *ns_last < d {
250            let r = *ns_last;
251            *ns_last = 0;
252            (ns_init, r)
253        } else {
254            (ns, 0)
255        };
256        let d = d << bits;
257        r <<= bits;
258        let d_inv = limbs_invert_limb::<DoubleLimb, Limb>(d);
259        let last_index = ns.len() - 1;
260        let mut previous_n = ns[last_index];
261        let cobits = Limb::WIDTH - bits;
262        r |= previous_n >> cobits;
263        for i in (0..last_index).rev() {
264            let n = ns[i];
265            let shifted_n = (previous_n << bits) | (n >> cobits);
266            (ns[i + 1], r) = div_mod_by_preinversion(r, shifted_n, d, d_inv);
267            previous_n = n;
268        }
269        let out_r;
270        (ns[0], out_r) = div_mod_by_preinversion(r, previous_n << bits, d, d_inv);
271        out_r >> bits
272    }
273}}
274
275// Let `ns` be the limbs of a `Natural` $n$, and let $f$ be `fraction_len`. This function performs
276// the integer division $B^fn / d$, writing the `ns.len() + fraction_len` limbs of the quotient to
277// `out` and returning the remainder.
278//
279// `shift` must be the number of leading zeros of `d`, and `d_inv` must be `limbs_invert_limb(d <<
280// shift)`.
281//
282// # Worst-case complexity
283// $T(n) = O(n)$
284//
285// $M(n) = O(1)$
286//
287// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len() + fraction_len`.
288//
289// # Panics
290// Panics if `out` is shorter than `ns.len()` + `fraction_len`, if `ns` is empty, or if `d` is zero.
291//
292// This is equivalent to `mpn_preinv_divrem_1` from `mpn/generic/pre_divrem_1.c`, GMP 6.2.1, where
293// `qp != ap`.
294pub_test! {limbs_div_mod_extra(
295    out: &mut [Limb],
296    fraction_len: usize,
297    mut ns: &[Limb],
298    d: Limb,
299    d_inv: Limb,
300    shift: u64,
301) -> Limb {
302    assert!(!ns.is_empty());
303    assert_ne!(d, 0);
304    let (ns_last, ns_init) = ns.split_last().unwrap();
305    let ns_last = *ns_last;
306    let d_norm = d << shift;
307    let (fraction_out, integer_out) = out.split_at_mut(fraction_len);
308    let mut integer_out = &mut integer_out[..ns.len()];
309    let mut r;
310    if shift == 0 {
311        r = ns_last;
312        let q_high = r >= d_norm;
313        if r >= d_norm {
314            r -= d_norm;
315        }
316        let (integer_out_last, integer_out_init) = integer_out.split_last_mut().unwrap();
317        *integer_out_last = Limb::from(q_high);
318        for (q, &n) in integer_out_init.iter_mut().zip(ns_init.iter()).rev() {
319            (*q, r) = div_mod_by_preinversion(r, n, d_norm, d_inv);
320        }
321    } else {
322        r = 0;
323        if ns_last < d {
324            r = ns_last << shift;
325            let integer_out_last;
326            (integer_out_last, integer_out) = integer_out.split_last_mut().unwrap();
327            *integer_out_last = 0;
328            ns = ns_init;
329        }
330        if !ns.is_empty() {
331            let co_shift = Limb::WIDTH - shift;
332            let (ns_last, ns_init) = ns.split_last().unwrap();
333            let mut previous_n = *ns_last;
334            r |= previous_n >> co_shift;
335            let (integer_out_head, integer_out_tail) = integer_out.split_first_mut().unwrap();
336            for (q, &n) in integer_out_tail.iter_mut().zip(ns_init.iter()).rev() {
337                assert!(r < d_norm);
338                (*q, r) = div_mod_by_preinversion(
339                    r,
340                    (previous_n << shift) | (n >> co_shift),
341                    d_norm,
342                    d_inv,
343                );
344                previous_n = n;
345            }
346            (*integer_out_head, r) = div_mod_by_preinversion(r, previous_n << shift, d_norm, d_inv);
347        }
348    }
349    for q in fraction_out.iter_mut().rev() {
350        (*q, r) = div_mod_by_preinversion(r, 0, d_norm, d_inv);
351    }
352    r >> shift
353}}
354
355// Let `&ns[fraction_len..]` be the limbs of a `Natural` $n$, and let $f$ be `fraction_len`. This
356// function performs the integer division $B^fn / d$, writing the `ns.len() + fraction_len` limbs of
357// the quotient to `ns` and returning the remainder.
358//
359// `shift` must be the number of leading zeros of `d`, and `d_inv` must be `limbs_invert_limb(d <<
360// shift)`.
361//
362// # Worst-case complexity
363// $T(n) = O(n)$
364//
365// $M(n) = O(1)$
366//
367// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len() + fraction_len`.
368//
369// # Panics
370// Panics if `ns` is empty, if `ns.len()` is less than `fraction_len`, or if `d` is zero.
371//
372// This is equivalent to `mpn_preinv_divrem_1` from `mpn/generic/pre_divrem_1.c`, GMP 6.2.1, where
373// `qp == ap`.
374pub_crate_test! {limbs_div_mod_extra_in_place(
375    ns: &mut [Limb],
376    fraction_len: usize,
377    d: Limb,
378    d_inv: Limb,
379    shift: u64,
380) -> Limb {
381    assert_ne!(d, 0);
382    let (fraction_ns, mut integer_ns) = ns.split_at_mut(fraction_len);
383    let ns_last = *integer_ns.last().unwrap();
384    let d_norm = d << shift;
385    let mut r;
386    if shift == 0 {
387        r = ns_last;
388        let q_high = r >= d_norm;
389        if r >= d_norm {
390            r -= d_norm;
391        }
392        let (integer_ns_last, integer_ns_init) = integer_ns.split_last_mut().unwrap();
393        *integer_ns_last = Limb::from(q_high);
394        for q in integer_ns_init.iter_mut().rev() {
395            (*q, r) = div_mod_by_preinversion(r, *q, d_norm, d_inv);
396        }
397    } else {
398        r = 0;
399        if ns_last < d {
400            r = ns_last << shift;
401            let integer_ns_last;
402            (integer_ns_last, integer_ns) = integer_ns.split_last_mut().unwrap();
403            *integer_ns_last = 0;
404        }
405        if !integer_ns.is_empty() {
406            let co_shift = Limb::WIDTH - shift;
407            let mut previous_n = *integer_ns.last().unwrap();
408            r |= previous_n >> co_shift;
409            for i in (1..integer_ns.len()).rev() {
410                assert!(r < d_norm);
411                let n = integer_ns[i - 1];
412                (integer_ns[i], r) = div_mod_by_preinversion(
413                    r,
414                    (previous_n << shift) | (n >> co_shift),
415                    d_norm,
416                    d_inv,
417                );
418                previous_n = n;
419            }
420            (integer_ns[0], r) = div_mod_by_preinversion(r, previous_n << shift, d_norm, d_inv);
421        }
422    }
423    for q in fraction_ns.iter_mut().rev() {
424        (*q, r) = div_mod_by_preinversion(r, 0, d_norm, d_inv);
425    }
426    r >> shift
427}}
428
429// Computes floor((B ^ 3 - 1) / (`hi` * B + `lo`)) - B, where B = 2 ^ `Limb::WIDTH`, assuming the
430// highest bit of `hi` is set.
431//
432// # Worst-case complexity
433// Constant time and additional memory.
434//
435// # Panics
436// Panics if `hi` is zero.
437//
438// This is equivalent to `invert_pi1` from `gmp-impl.h`, GMP 6.2.1, where the result is returned
439// instead of being written to `dinv`.
440pub_crate_test! {limbs_two_limb_inverse_helper(hi: Limb, lo: Limb) -> Limb {
441    let mut d_inv = limbs_invert_limb::<DoubleLimb, Limb>(hi);
442    let mut hi_product = hi.wrapping_mul(d_inv);
443    hi_product.wrapping_add_assign(lo);
444    if hi_product < lo {
445        d_inv.wrapping_sub_assign(1);
446        if hi_product >= hi {
447            hi_product.wrapping_sub_assign(hi);
448            d_inv.wrapping_sub_assign(1);
449        }
450        hi_product.wrapping_sub_assign(hi);
451    }
452    let (lo_product_hi, lo_product_lo) = Limb::x_mul_y_to_zz(lo, d_inv);
453    hi_product.wrapping_add_assign(lo_product_hi);
454    if hi_product < lo_product_hi {
455        d_inv.wrapping_sub_assign(1);
456        if hi_product > hi || hi_product == hi && lo_product_lo >= lo {
457            d_inv.wrapping_sub_assign(1);
458        }
459    }
460    d_inv
461}}
462
463// Computes the quotient and remainder of `[n_2, n_1, n_0]` / `[d_1, d_0]`. Requires the highest bit
464// of `d_1` to be set, and `[n_2, n_1]` < `[d_1, d_0]`. `d_inv` is the inverse of `[d_1, d_0]`
465// computed by `limbs_two_limb_inverse_helper`.
466//
467// # Worst-case complexity
468// Constant time and additional memory.
469//
470// This is equivalent to `udiv_qr_3by2` from `gmp-impl.h`, GMP 6.2.1.
471pub_crate_test! {limbs_div_mod_three_limb_by_two_limb(
472    n_2: Limb,
473    n_1: Limb,
474    n_0: Limb,
475    d_1: Limb,
476    d_0: Limb,
477    d_inv: Limb,
478) -> (Limb, DoubleLimb) {
479    let (mut q, q_lo) = (DoubleLimb::from(n_2) * DoubleLimb::from(d_inv))
480        .wrapping_add(DoubleLimb::join_halves(n_2, n_1))
481        .split_in_half();
482    let d = DoubleLimb::join_halves(d_1, d_0);
483    // Compute the two most significant limbs of n - q * d
484    let mut r = DoubleLimb::join_halves(n_1.wrapping_sub(d_1.wrapping_mul(q)), n_0)
485        .wrapping_sub(d)
486        .wrapping_sub(DoubleLimb::from(d_0) * DoubleLimb::from(q));
487    q.wrapping_add_assign(1);
488    // Conditionally adjust q and the remainder
489    if r.upper_half() >= q_lo {
490        let (r_plus_d, overflow) = r.overflowing_add(d);
491        if overflow {
492            q.wrapping_sub_assign(1);
493            r = r_plus_d;
494        }
495    } else if r >= d {
496        q.wrapping_add_assign(1);
497        r.wrapping_sub_assign(d);
498    }
499    (q, r)
500}}
501
502// Divides `ns` by `ds` and writes the `ns.len()` - 2 least-significant quotient limbs to `qs` and
503// the 2-long remainder to `ns`. Returns the most significant limb of the quotient; `true` means 1
504// and `false` means 0. `ds` must have length 2, `ns` must have length at least 2, and the most
505// significant bit of `ds[1]` must be set.
506//
507// # Worst-case complexity
508// $T(n) = O(n)$
509//
510// $M(n) = O(1)$
511//
512// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
513//
514// # Panics
515// Panics if `ds` does not have length 2, `ns` has length less than 2, `qs` has length less than
516// `ns.len() - 2`, or `ds[1]` does not have its highest bit set.
517//
518// This is equivalent to `mpn_divrem_2` from `mpn/generic/divrem_2.c`, GMP 6.2.1.
519pub_crate_test! {limbs_div_mod_by_two_limb_normalized(
520    qs: &mut [Limb],
521    ns: &mut [Limb],
522    ds: &[Limb]
523) -> bool {
524    assert_eq!(ds.len(), 2);
525    let n_len = ns.len();
526    assert!(n_len >= 2);
527    let n_limit = n_len - 2;
528    assert!(ds[1].get_highest_bit());
529    let d_1 = ds[1];
530    let d_0 = ds[0];
531    let d = DoubleLimb::join_halves(d_1, d_0);
532    let mut r = DoubleLimb::join_halves(ns[n_limit + 1], ns[n_limit]);
533    let highest_q = r >= d;
534    if highest_q {
535        r.wrapping_sub_assign(d);
536    }
537    let (mut r_1, mut r_0) = r.split_in_half();
538    let d_inv = limbs_two_limb_inverse_helper(d_1, d_0);
539    for (&n, q) in ns[..n_limit].iter().zip(qs[..n_limit].iter_mut()).rev() {
540        let r;
541        (*q, r) = limbs_div_mod_three_limb_by_two_limb(r_1, r_0, n, d_1, d_0, d_inv);
542        (r_1, r_0) = r.split_in_half();
543    }
544    ns[1] = r_1;
545    ns[0] = r_0;
546    highest_q
547}}
548
549// Schoolbook division using the Möller-Granlund 3/2 division algorithm.
550//
551// Divides `ns` by `ds` and writes the `ns.len()` - `ds.len()` least-significant quotient limbs to
552// `qs` and the `ds.len()` limbs of the remainder to `ns`. Returns the most significant limb of the
553// quotient; `true` means 1 and `false` means 0. `ds` must have length greater than 2, `ns` must be
554// at least as long as `ds`, and the most significant bit of `ds` must be set. `d_inv` should be the
555// result of `limbs_two_limb_inverse_helper` applied to the two highest limbs of the denominator.
556//
557// # Worst-case complexity
558// $T(n, d) = O(d(n - d + 1)) = O(n^2)$
559//
560// $M(n) = O(1)$
561//
562// where $T$ is time, $M$ is additional memory, $n$ is `ns.len()`, and $d$ is `ds.len()`.
563//
564// # Panics
565// Panics if `ds` has length smaller than 3, `ns` is shorter than `ds`, `qs` has length less than
566// `ns.len()` - `ds.len()`, or the last limb of `ds` does not have its highest bit set.
567//
568// This is equivalent to `mpn_sbpi1_div_qr` from `mpn/generic/sbpi1_div_qr.c`, GMP 6.2.1.
569pub_crate_test! {limbs_div_mod_schoolbook(
570    qs: &mut [Limb],
571    ns: &mut [Limb],
572    ds: &[Limb],
573    d_inv: Limb,
574) -> bool {
575    let d_len = ds.len();
576    assert!(d_len > 2);
577    let n_len = ns.len();
578    assert!(n_len >= d_len);
579    let d_1 = ds[d_len - 1];
580    assert!(d_1.get_highest_bit());
581    let d_0 = ds[d_len - 2];
582    let ds_except_last = &ds[..d_len - 1];
583    let ds_except_last_two = &ds[..d_len - 2];
584    let ns_hi = &mut ns[n_len - d_len..];
585    let highest_q = limbs_cmp_same_length(ns_hi, ds) >= Equal;
586    if highest_q {
587        limbs_sub_same_length_in_place_left(ns_hi, ds);
588    }
589    let mut n_1 = ns[n_len - 1];
590    for i in (d_len..n_len).rev() {
591        let j = i - d_len;
592        let mut q;
593        if n_1 == d_1 && ns[i - 1] == d_0 {
594            q = Limb::MAX;
595            limbs_sub_mul_limb_same_length_in_place_left(&mut ns[j..i], ds, q);
596            n_1 = ns[i - 1]; // update n_1, last loop's value will now be invalid
597        } else {
598            let (ns_lo, ns_hi) = ns.split_at_mut(i - 2);
599            let n;
600            (q, n) = limbs_div_mod_three_limb_by_two_limb(n_1, ns_hi[1], ns_hi[0], d_1, d_0, d_inv);
601            let mut n_0;
602            (n_1, n_0) = n.split_in_half();
603            let local_carry_1 = limbs_sub_mul_limb_same_length_in_place_left(
604                &mut ns_lo[j..],
605                ds_except_last_two,
606                q,
607            );
608            let local_carry_2 = n_0 < local_carry_1;
609            n_0.wrapping_sub_assign(local_carry_1);
610            let carry = local_carry_2 && n_1 == 0;
611            if local_carry_2 {
612                n_1.wrapping_sub_assign(1);
613            }
614            ns_hi[0] = n_0;
615            if carry {
616                n_1.wrapping_add_assign(d_1);
617                if limbs_slice_add_same_length_in_place_left(&mut ns[j..i - 1], ds_except_last) {
618                    n_1.wrapping_add_assign(1);
619                }
620                q.wrapping_sub_assign(1);
621            }
622        }
623        qs[j] = q;
624    }
625    ns[d_len - 1] = n_1;
626    highest_q
627}}
628
629// # Worst-case complexity
630// $T(n) = O(n (\log n)^2 \log \log n)$
631//
632// $M(n) = O(n \log n)$
633//
634// where $T$ is time, $M$ is additional memory, and $n$ is `ds.len()`.
635//
636// This is equivalent to `mpn_dcpi1_div_qr_n` from `mpn/generic/dcpi1_div_qr.c`, GMP 6.2.1.
637pub(crate) fn limbs_div_mod_divide_and_conquer_helper(
638    qs: &mut [Limb],
639    ns: &mut [Limb],
640    ds: &[Limb],
641    d_inv: Limb,
642    scratch: &mut [Limb],
643) -> bool {
644    let n = ds.len();
645    let lo = n >> 1; // floor(n / 2)
646    let hi = n - lo; // ceil(n / 2)
647    let qs_hi = &mut qs[lo..];
648    let (ds_lo, ds_hi) = ds.split_at(lo);
649    let mut highest_q = if hi < DC_DIV_QR_THRESHOLD {
650        limbs_div_mod_schoolbook(qs_hi, &mut ns[lo << 1..n << 1], ds_hi, d_inv)
651    } else {
652        limbs_div_mod_divide_and_conquer_helper(qs_hi, &mut ns[lo << 1..], ds_hi, d_inv, scratch)
653    };
654    let qs_hi = &mut qs_hi[..hi];
655    let mut mul_scratch = vec![0; limbs_mul_greater_to_out_scratch_len(qs_hi.len(), ds_lo.len())];
656    limbs_mul_greater_to_out(scratch, qs_hi, ds_lo, &mut mul_scratch);
657    let ns_lo = &mut ns[..n + lo];
658    let mut carry = Limb::from(limbs_sub_same_length_in_place_left(
659        &mut ns_lo[lo..],
660        &scratch[..n],
661    ));
662    if highest_q && limbs_sub_same_length_in_place_left(&mut ns_lo[n..], ds_lo) {
663        carry += 1;
664    }
665    while carry != 0 {
666        if limbs_sub_limb_in_place(qs_hi, 1) {
667            assert!(highest_q);
668            highest_q = false;
669        }
670        if limbs_slice_add_same_length_in_place_left(&mut ns_lo[lo..], ds) {
671            carry -= 1;
672        }
673    }
674    let (ds_lo, ds_hi) = ds.split_at(hi);
675    let q_lo = if lo < DC_DIV_QR_THRESHOLD {
676        limbs_div_mod_schoolbook(qs, &mut ns[hi..n + lo], ds_hi, d_inv)
677    } else {
678        limbs_div_mod_divide_and_conquer_helper(qs, &mut ns[hi..], ds_hi, d_inv, scratch)
679    };
680    let qs_lo = &mut qs[..lo];
681    let ns_lo = &mut ns[..n];
682    let mut mul_scratch = vec![0; limbs_mul_greater_to_out_scratch_len(ds_lo.len(), lo)];
683    limbs_mul_greater_to_out(scratch, ds_lo, qs_lo, &mut mul_scratch);
684    let mut carry = Limb::from(limbs_sub_same_length_in_place_left(ns_lo, &scratch[..n]));
685    if q_lo && limbs_sub_same_length_in_place_left(&mut ns_lo[lo..], ds_lo) {
686        carry += 1;
687    }
688    while carry != 0 {
689        limbs_sub_limb_in_place(qs_lo, 1);
690        if limbs_slice_add_same_length_in_place_left(ns_lo, ds) {
691            carry -= 1;
692        }
693    }
694    highest_q
695}
696
697// # Worst-case complexity
698// $T(n) = O(n (\log n)^2 \log \log n)$
699//
700// $M(n) = O(n \log n)$
701//
702// where $T$ is time, $M$ is additional memory, and $n$ is `ds.len()`.
703pub_test! {limbs_div_dc_helper(
704    qs: &mut [Limb],
705    ns: &mut [Limb],
706    ds: &[Limb],
707    d_inv: Limb,
708    scratch: &mut [Limb],
709) -> bool {
710    if qs.len() < DC_DIV_QR_THRESHOLD {
711        limbs_div_mod_schoolbook(qs, ns, ds, d_inv)
712    } else {
713        limbs_div_mod_divide_and_conquer_helper(qs, ns, ds, d_inv, scratch)
714    }
715}}
716
717// Recursive divide-and-conquer division.
718//
719// # Worst-case complexity
720// $T(n) = O(n (\log n)^2 \log \log n)$
721//
722// $M(n) = O(n \log n)$
723//
724// where $T$ is time, $M$ is additional memory, and $n$ is `ds.len()`.
725//
726// # Panics
727// Panics if `ds` has length smaller than 6, `ns.len()` is less than `ds.len()` + 3, `qs` has length
728// less than `ns.len()` - `ds.len()`, or the last limb of `ds` does not have its highest bit set.
729//
730// This is equivalent to `mpn_dcpi1_div_qr` from `mpn/generic/dcpi1_div_qr.c`, GMP 6.2.1.
731pub_test! {limbs_div_mod_divide_and_conquer(
732    qs: &mut [Limb],
733    ns: &mut [Limb],
734    ds: &[Limb],
735    d_inv: Limb,
736) -> bool {
737    let n_len = ns.len();
738    let d_len = ds.len();
739    assert!(d_len >= 6); // to adhere to limbs_div_mod_schoolbook's limits
740    assert!(n_len >= d_len + 3); // to adhere to limbs_div_mod_schoolbook's limits
741    let a = d_len - 1;
742    let d_1 = ds[a];
743    let b = d_len - 2;
744    assert!(d_1.get_highest_bit());
745    let mut scratch = vec![0; d_len];
746    let mut highest_q;
747    let q_len = n_len - d_len;
748    if q_len > d_len {
749        let q_len_mod_d_len = {
750            let mut m = q_len % d_len;
751            if m == 0 {
752                m = d_len;
753            }
754            m
755        };
756        // Perform the typically smaller block first. Point at low limb of next quotient block
757        let qs_alt = &mut qs[q_len - q_len_mod_d_len..q_len];
758        if q_len_mod_d_len == 1 {
759            // Handle highest_q up front, for simplicity.
760            let ns_hi = &mut ns[q_len - 1..];
761            let ns_hi_hi = &mut ns_hi[1..];
762            highest_q = limbs_cmp_same_length(ns_hi_hi, ds) >= Equal;
763            if highest_q {
764                assert!(!limbs_sub_same_length_in_place_left(ns_hi_hi, ds));
765            }
766            // A single iteration of schoolbook: One 3/2 division, followed by the bignum update and
767            // adjustment.
768            let (last_n, ns) = ns_hi.split_last_mut().unwrap();
769            let n_2 = *last_n;
770            let mut n_1 = ns[a];
771            let mut n_0 = ns[b];
772            let d_0 = ds[b];
773            assert!(n_2 < d_1 || n_2 == d_1 && n_1 <= d_0);
774            let mut q;
775            if n_2 == d_1 && n_1 == d_0 {
776                q = Limb::MAX;
777                assert_eq!(limbs_sub_mul_limb_same_length_in_place_left(ns, ds, q), n_2);
778            } else {
779                let n;
780                (q, n) = limbs_div_mod_three_limb_by_two_limb(n_2, n_1, n_0, d_1, d_0, d_inv);
781                (n_1, n_0) = n.split_in_half();
782                // d_len > 2 because of precondition. No need to check
783                let local_carry_1 =
784                    limbs_sub_mul_limb_same_length_in_place_left(&mut ns[..b], &ds[..b], q);
785                let local_carry_2 = n_0 < local_carry_1;
786                n_0.wrapping_sub_assign(local_carry_1);
787                let carry = local_carry_2 && n_1 == 0;
788                if local_carry_2 {
789                    n_1.wrapping_sub_assign(1);
790                }
791                ns[b] = n_0;
792                let (last_n, ns) = ns.split_last_mut().unwrap();
793                if carry {
794                    n_1.wrapping_add_assign(d_1);
795                    if limbs_slice_add_same_length_in_place_left(ns, &ds[..a]) {
796                        n_1.wrapping_add_assign(1);
797                    }
798                    if q == 0 {
799                        assert!(highest_q);
800                        highest_q = false;
801                    }
802                    q.wrapping_sub_assign(1);
803                }
804                *last_n = n_1;
805            }
806            qs_alt[0] = q;
807        } else {
808            // Do a 2 * q_len_mod_d_len / q_len_mod_d_len division
809            let (ds_lo, ds_hi) = ds.split_at(d_len - q_len_mod_d_len);
810            highest_q = {
811                let ns = &mut ns[n_len - (q_len_mod_d_len << 1)..];
812                if q_len_mod_d_len == 2 {
813                    limbs_div_mod_by_two_limb_normalized(qs_alt, ns, ds_hi)
814                } else {
815                    limbs_div_dc_helper(qs_alt, ns, ds_hi, d_inv, &mut scratch)
816                }
817            };
818            if q_len_mod_d_len != d_len {
819                let mut mul_scratch =
820                    vec![0; limbs_mul_to_out_scratch_len(qs_alt.len(), ds_lo.len())];
821                limbs_mul_to_out(&mut scratch, qs_alt, ds_lo, &mut mul_scratch);
822                let ns = &mut ns[q_len - q_len_mod_d_len..n_len - q_len_mod_d_len];
823                let mut carry = Limb::from(limbs_sub_same_length_in_place_left(ns, &scratch));
824                if highest_q
825                    && limbs_sub_same_length_in_place_left(&mut ns[q_len_mod_d_len..], ds_lo)
826                {
827                    carry += 1;
828                }
829                while carry != 0 {
830                    if limbs_sub_limb_in_place(qs_alt, 1) {
831                        assert!(highest_q);
832                        highest_q = false;
833                    }
834                    if limbs_slice_add_same_length_in_place_left(ns, ds) {
835                        carry -= 1;
836                    }
837                }
838            }
839        }
840        // offset is a multiple of d_len
841        let mut offset = n_len.checked_sub(d_len + q_len_mod_d_len).unwrap();
842        while offset != 0 {
843            offset -= d_len;
844            limbs_div_mod_divide_and_conquer_helper(
845                &mut qs[offset..],
846                &mut ns[offset..],
847                ds,
848                d_inv,
849                &mut scratch,
850            );
851        }
852    } else {
853        let m = d_len - q_len;
854        let (ds_lo, ds_hi) = ds.split_at(m);
855        highest_q = if q_len < DC_DIV_QR_THRESHOLD {
856            limbs_div_mod_schoolbook(qs, &mut ns[m..], ds_hi, d_inv)
857        } else {
858            limbs_div_mod_divide_and_conquer_helper(qs, &mut ns[m..], ds_hi, d_inv, &mut scratch)
859        };
860        if m != 0 {
861            let qs = &mut qs[..q_len];
862            let ns = &mut ns[..d_len];
863            let mut mul_scratch = vec![0; limbs_mul_to_out_scratch_len(q_len, ds_lo.len())];
864            limbs_mul_to_out(&mut scratch, qs, ds_lo, &mut mul_scratch);
865            let mut carry = Limb::from(limbs_sub_same_length_in_place_left(ns, &scratch));
866            if highest_q && limbs_sub_same_length_in_place_left(&mut ns[q_len..], ds_lo) {
867                carry += 1;
868            }
869            while carry != 0 {
870                if limbs_sub_limb_in_place(qs, 1) {
871                    assert!(highest_q);
872                    highest_q = false;
873                }
874                if limbs_slice_add_same_length_in_place_left(ns, ds) {
875                    carry -= 1;
876                }
877            }
878        }
879    }
880    highest_q
881}}
882
883pub_test! {limbs_div_approx_helper(qs: &mut [Limb], ns: &mut [Limb], ds: &[Limb], d_inv: Limb) {
884    if ds.len() < DC_DIVAPPR_Q_THRESHOLD {
885        limbs_div_schoolbook_approx(qs, ns, ds, d_inv);
886    } else {
887        limbs_div_divide_and_conquer_approx(qs, ns, ds, d_inv);
888    }
889}}
890
891// Takes the strictly normalized value ds (i.e., most significant bit must be set) as an input, and
892// computes the approximate reciprocal of `ds`, with the same length as `ds`. See documentation for
893// `limbs_invert_approx` for an explanation of the return value.
894//
895// # Worst-case complexity
896// $T(n) = O(n (\log n)^2 \log \log n)$
897//
898// $M(n) = O(n \log n)$
899//
900// where $T$ is time, $M$ is additional memory, and $n$ is `ds.len()`.
901//
902// # Panics
903// Panics if `ds` is empty, `is` is shorter than `ds`, `scratch` is shorter than twice the length of
904// `ds`, or the last limb of `ds` does not have its highest bit set.
905//
906// This is equivalent to `mpn_bc_invertappr` from `mpn/generic/invertappr.c`, GMP 6.2.1, where the
907// return value is `true` iff the return value of `mpn_bc_invertappr` would be 0.
908pub_test! {limbs_invert_basecase_approx(
909    is: &mut [Limb],
910    ds: &[Limb],
911    scratch: &mut [Limb]
912) -> bool {
913    let d_len = ds.len();
914    assert_ne!(d_len, 0);
915    let highest_d = ds[d_len - 1];
916    assert!(highest_d.get_highest_bit());
917    if d_len == 1 {
918        let d = ds[0];
919        is[0] = limbs_invert_limb::<DoubleLimb, Limb>(d);
920    } else {
921        let scratch = &mut scratch[..d_len << 1];
922        let (scratch_lo, scratch_hi) = scratch.split_at_mut(d_len);
923        for s in &mut *scratch_lo {
924            *s = Limb::MAX;
925        }
926        limbs_not_to_out(scratch_hi, ds);
927        // Now scratch contains 2 ^ (2 * d_len * Limb::WIDTH) - d * 2 ^ (d_len * Limb::WIDTH) - 1
928        if d_len == 2 {
929            limbs_div_mod_by_two_limb_normalized(is, scratch, ds);
930        } else {
931            let d_inv = limbs_two_limb_inverse_helper(highest_d, ds[d_len - 2]);
932            if MAYBE_DCP1_DIVAPPR {
933                limbs_div_approx_helper(is, scratch, ds, d_inv);
934            } else {
935                limbs_div_schoolbook_approx(is, scratch, ds, d_inv);
936            }
937            assert!(!limbs_sub_limb_in_place(&mut is[..d_len], 1));
938            return false;
939        }
940    }
941    true
942}}
943
944// Takes the strictly normalized value ds (i.e., most significant bit must be set) as an input, and
945// computes the approximate reciprocal of `ds`, with the same length as `ds`. See documentation for
946// `limbs_invert_approx` for an explanation of the return value.
947//
948// Uses Newton's iterations (at least one). Inspired by Algorithm "ApproximateReciprocal", published
949// in "Modern Computer Arithmetic" by Richard P. Brent and Paul Zimmermann, algorithm 3.5, page 121
950// in version 0.4 of the book.
951//
952// Some adaptations were introduced, to allow product mod B ^ m - 1 and return the value e.
953//
954// We introduced a correction in such a way that "the value of B ^ {n + h} - T computed at step 8
955// cannot exceed B ^ n - 1" (the book reads "2 * B ^ n - 1").
956//
957// Maximum scratch needed by this branch <= 2 * n, but have to fit 3 * rn in the scratch, i.e. 3 *
958// rn <= 2 * n: we require n > 4.
959//
960// We use a wrapped product modulo B ^ m - 1.
961//
962// # Worst-case complexity
963// $T(n) = O(n \log n \log \log n)$
964//
965// $M(n) = O(n \log n)$
966//
967// where $T$ is time, $M$ is additional memory, and $n$ is `is.len()`.
968//
969// # Panics
970// Panics if `ds` has length less than 5, `is` is shorter than `ds`, `scratch` is shorter than twice
971// the length of `ds`, or the last limb of `ds` does not have its highest bit set.
972//
973// This is equivalent to `mpn_ni_invertappr` from `mpn/generic/invertappr.c`, GMP 6.2.1, where the
974// return value is `true` iff the return value of `mpn_ni_invertappr` would be 0.
975pub_test! {limbs_invert_newton_approx(is: &mut [Limb], ds: &[Limb], scratch: &mut [Limb]) -> bool {
976    let d_len = ds.len();
977    assert!(d_len > 4);
978    assert!(ds[d_len - 1].get_highest_bit());
979    let is = &mut is[..d_len];
980    // Compute the computation precisions from highest to lowest, leaving the base case size in
981    // 'previous_d'.
982    let mut size = d_len;
983    let mut sizes = vec![size];
984    size = (size >> 1) + 1;
985    let mut scratch2 = vec![];
986    let mut mul_size = 0;
987    if d_len >= INV_MULMOD_BNM1_THRESHOLD {
988        mul_size = limbs_mul_mod_base_pow_n_minus_1_next_size(d_len + 1);
989        scratch2 = vec![0; limbs_mul_mod_base_pow_n_minus_1_scratch_len(mul_size, d_len, size)];
990    }
991    while size >= INV_NEWTON_THRESHOLD {
992        sizes.push(size);
993        size = (size >> 1) + 1;
994    }
995    // We compute the inverse of 0.ds as 1.is. Compute a base value of previous_d limbs.
996    limbs_invert_basecase_approx(&mut is[d_len - size..], &ds[d_len - size..], scratch);
997    let mut previous_size = size;
998    let mut a = 0;
999    // Use Newton's iterations to get the desired precision.
1000    for &size in sizes.iter().rev() {
1001        // ```
1002        // v    d       v
1003        // +----+-------+
1004        // ^ previous_d ^
1005        // ```
1006        //
1007        // Compute i_j * d
1008        let ds_hi = &ds[d_len - size..];
1009        let condition = size < INV_MULMOD_BNM1_THRESHOLD || {
1010            mul_size = limbs_mul_mod_base_pow_n_minus_1_next_size(size + 1);
1011            mul_size > size + previous_size
1012        };
1013        let diff = size - previous_size;
1014        let is_hi = &mut is[d_len - previous_size..];
1015        if condition {
1016            let mut mul_scratch =
1017                vec![0; limbs_mul_greater_to_out_scratch_len(ds_hi.len(), is_hi.len())];
1018            limbs_mul_greater_to_out(scratch, ds_hi, is_hi, &mut mul_scratch);
1019            limbs_slice_add_same_length_in_place_left(
1020                &mut scratch[previous_size..=size],
1021                &ds_hi[..=diff],
1022            );
1023        } else {
1024            // Remember we truncated mod B ^ (d + 1) We computed (truncated) xp of length d + 1 <-
1025            // 1.is * 0.ds Use B ^ mul_size - 1 wraparound
1026            limbs_mul_mod_base_pow_n_minus_1(scratch, mul_size, ds_hi, is_hi, &mut scratch2);
1027            let scratch = &mut scratch[..=mul_size];
1028            // We computed {xp, mul_size} <- {is, previous_d} * {ds, d} mod (B ^ mul_size - 1) We
1029            // know that 2 * |is * ds + ds * B ^ previous_d - B ^ {previous_d + d}| < B ^ mul_size
1030            // - 1 Add ds * B ^ previous_d mod (B ^ mul_size - 1)
1031            let mul_diff = mul_size - previous_size;
1032            assert!(size >= mul_diff);
1033            let (ds_hi_lo, ds_hi_hi) = ds_hi.split_at(mul_diff);
1034            let carry = limbs_slice_add_same_length_in_place_left(
1035                &mut scratch[previous_size..mul_size],
1036                ds_hi_lo,
1037            );
1038            // Subtract B ^ {previous_d + d}, maybe only compensate the carry
1039            scratch[mul_size] = 1; // set a limit for decrement
1040            let (scratch_lo, scratch_hi) = scratch.split_at_mut(size - mul_diff);
1041            if !limbs_add_same_length_with_carry_in_in_place_left(scratch_lo, ds_hi_hi, carry) {
1042                assert!(!limbs_sub_limb_in_place(scratch_hi, 1));
1043            }
1044            // if decrement eroded xp[mul_size]
1045            let (scratch_last, scratch_init) = scratch.split_last_mut().unwrap();
1046            assert!(!limbs_sub_limb_in_place(
1047                scratch_init,
1048                1.wrapping_sub(*scratch_last)
1049            ));
1050            // Remember we are working mod B ^ mul_size - 1
1051        }
1052        if scratch[size] < 2 {
1053            // "positive" residue class
1054            let (scratch_lo, scratch_hi) = scratch.split_at_mut(size);
1055            let mut carry = scratch_hi[0] + 1; // 1 <= carry <= 2 here.
1056            if carry == 2 && !limbs_sub_same_length_in_place_left(scratch_lo, ds_hi) {
1057                carry = 3;
1058                assert!(limbs_sub_same_length_in_place_left(scratch_lo, ds_hi));
1059            }
1060            // 1 <= carry <= 3 here.
1061            if limbs_cmp_same_length(scratch_lo, ds_hi) == Greater {
1062                assert!(!limbs_sub_same_length_in_place_left(scratch_lo, ds_hi));
1063                carry += 1;
1064            }
1065            let (scratch_lo, scratch_mid) = scratch_lo.split_at_mut(diff);
1066            let (ds_hi_lo, ds_hi_hi) = ds_hi.split_at(diff);
1067            let borrow = limbs_cmp_same_length(scratch_lo, ds_hi_lo) == Greater;
1068            assert!(!limbs_sub_same_length_with_borrow_in_to_out(
1069                &mut scratch_hi[diff..],
1070                ds_hi_hi,
1071                scratch_mid,
1072                borrow
1073            ));
1074            assert!(!limbs_sub_limb_in_place(is_hi, carry)); // 1 <= carry <= 4 here
1075        } else {
1076            // "negative" residue class
1077            assert!(scratch[size] >= Limb::MAX - 1);
1078            if condition {
1079                assert!(!limbs_sub_limb_in_place(&mut scratch[..=size], 1));
1080            }
1081            let (scratch_lo, scratch_hi) = scratch.split_at_mut(size);
1082            if scratch_hi[0] != Limb::MAX {
1083                assert!(!limbs_slice_add_limb_in_place(is_hi, 1));
1084                assert!(limbs_slice_add_same_length_in_place_left(scratch_lo, ds_hi));
1085            }
1086            limbs_not_to_out(&mut scratch_hi[diff..size], &scratch_lo[diff..]);
1087        }
1088        // Compute x_j * u_j
1089        let (scratch_lo, scratch_hi) = scratch.split_at_mut(size + diff);
1090        let mut mul_scratch = vec![0; limbs_mul_same_length_to_out_scratch_len(is_hi.len())];
1091        limbs_mul_same_length_to_out(
1092            scratch_lo,
1093            &scratch_hi[..previous_size],
1094            is_hi,
1095            &mut mul_scratch,
1096        );
1097        a = (previous_size << 1) - diff;
1098        let carry = {
1099            let (scratch_lo, scratch_hi) = scratch.split_at_mut(a);
1100            limbs_slice_add_same_length_in_place_left(
1101                &mut scratch_lo[previous_size..],
1102                &scratch_hi[3 * diff - previous_size..diff << 1],
1103            )
1104        };
1105        if limbs_add_same_length_with_carry_in_to_out(
1106            &mut is[d_len - size..],
1107            &scratch[a..previous_size << 1],
1108            &scratch[size + previous_size..size << 1],
1109            carry,
1110        ) {
1111            assert!(!limbs_slice_add_limb_in_place(
1112                &mut is[d_len - previous_size..],
1113                1
1114            ));
1115        }
1116        previous_size = size;
1117    }
1118    // Check for possible carry propagation from below. Be conservative.
1119    scratch[a - 1] <= Limb::MAX - 7
1120}}
1121
1122// Takes the strictly normalized value ds (i.e., most significant bit must be set) as an input, and
1123// computes the approximate reciprocal of `ds`, with the same length as `ds`.
1124//
1125// Let result_definitely_exact = limbs_invert_basecase_approx(is, ds, scratch) be the returned
1126// value. If result_definitely_exact is `true`, the error e is 0; otherwise, it may be 0 or 1. The
1127// following condition is satisfied by the output:
1128//
1129// ds * (2 ^ (n * Limb::WIDTH) + is) < 2 ^ (2 * n * Limb::WIDTH) <= ds * (2 ^ (n * Limb::WIDTH) + is
1130// + 1 + e), where n = `ds.len()`.
1131//
1132// When the strict result is needed, i.e., e = 0 in the relation above, the function `mpn_invert`
1133// (TODO!) should be used instead.
1134//
1135// # Worst-case complexity
1136// $T(n) = O(n \log n \log \log n)$
1137//
1138// $M(n) = O(n \log n)$
1139//
1140// where $T$ is time, $M$ is additional memory, and $n$ is `is.len()`.
1141//
1142// # Panics
1143// Panics if `ds` is empty, `is` is shorter than `ds`, `scratch` is shorter than twice the length of
1144// `ds`, or the last limb of `ds` does not have its highest bit set.
1145//
1146// This is equivalent to `mpn_invertappr` from `mpn/generic/invertappr.c`, GMP 6.2.1, where the
1147// return value is `true` iff the return value of `mpn_invertappr` would be 0.
1148pub_crate_test! {limbs_invert_approx(is: &mut [Limb], ds: &[Limb], scratch: &mut [Limb]) -> bool {
1149    if ds.len() < INV_NEWTON_THRESHOLD {
1150        limbs_invert_basecase_approx(is, ds, scratch)
1151    } else {
1152        limbs_invert_newton_approx(is, ds, scratch)
1153    }
1154}}
1155
1156// TODO tune
1157pub(crate) const MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD: usize = INV_MULMOD_BNM1_THRESHOLD >> 1;
1158
1159// # Worst-case complexity
1160// $T(n) = O(n \log n \log\log n)$
1161//
1162// $M(n) = O(n \log n)$
1163//
1164// where $T$ is time, $M$ is additional memory, and $n$ is `ds.len()`.
1165//
1166// - ds.len() >= 2
1167// - n_len >= 3
1168// - n_len >= ds.len()
1169// - i_len == limbs_div_mod_barrett_is_len(n_len - ds.len(), ds.len())
1170// - qs.len() == i_len
1171// - scratch_len ==  limbs_mul_mod_base_pow_n_minus_1_next_size(ds.len() + 1)
1172// - scratch.len() == limbs_div_mod_barrett_scratch_len(n_len, d_len) - i_len
1173// - rs_hi.len() == i_len
1174pub_crate_test! {limbs_div_barrett_large_product(
1175    scratch: &mut [Limb],
1176    ds: &[Limb],
1177    qs: &[Limb],
1178    rs_hi: &[Limb],
1179    scratch_len: usize,
1180    i_len: usize,
1181) {
1182    let d_len = ds.len();
1183    let (scratch, scratch_out) = scratch.split_at_mut(scratch_len);
1184    limbs_mul_mod_base_pow_n_minus_1(scratch, scratch_len, ds, qs, scratch_out);
1185    if d_len + i_len > scratch_len {
1186        let (rs_hi_lo, rs_hi_hi) = rs_hi.split_at(scratch_len - d_len);
1187        let carry_1 = limbs_sub_greater_in_place_left(scratch, rs_hi_hi);
1188        let carry_2 = limbs_cmp_same_length(rs_hi_lo, &scratch[d_len..]) == Less;
1189        if !carry_1 && carry_2 {
1190            assert!(!limbs_slice_add_limb_in_place(scratch, 1));
1191        } else {
1192            assert_eq!(carry_1, carry_2);
1193        }
1194    }
1195}}
1196
1197// # Worst-case complexity
1198// $T(n, d) = O(n \log d \log\log d)$
1199//
1200// $M(n) = O(d \log d)$
1201//
1202// where $T$ is time, $M$ is additional memory, $n$ is `ns.len()`, and $d$ is `ds.len()`.
1203//
1204// This is equivalent to `mpn_preinv_mu_div_qr` from `mpn/generic/mu_div_qr.c`, GMP 6.2.1.
1205fn limbs_div_mod_barrett_preinverted(
1206    qs: &mut [Limb],
1207    rs: &mut [Limb],
1208    ns: &[Limb],
1209    ds: &[Limb],
1210    mut is: &[Limb],
1211    scratch: &mut [Limb],
1212) -> bool {
1213    let n_len = ns.len();
1214    let d_len = ds.len();
1215    assert_eq!(rs.len(), d_len);
1216    let mut i_len = is.len();
1217    let q_len = n_len - d_len;
1218    let qs = &mut qs[..q_len];
1219    let (ns_lo, ns_hi) = ns.split_at(q_len);
1220    let highest_q = limbs_cmp_same_length(ns_hi, ds) >= Equal;
1221    if highest_q {
1222        limbs_sub_same_length_to_out(rs, ns_hi, ds);
1223    } else {
1224        rs.copy_from_slice(ns_hi);
1225    }
1226    let scratch_len = if i_len < MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD {
1227        0
1228    } else {
1229        limbs_mul_mod_base_pow_n_minus_1_next_size(d_len + 1)
1230    };
1231    let mut n = d_len - i_len;
1232    for (ns, qs) in ns_lo.rchunks(i_len).zip(qs.rchunks_mut(i_len)) {
1233        let chunk_len = ns.len();
1234        if i_len != chunk_len {
1235            // last iteration
1236            is = &is[i_len - chunk_len..];
1237            i_len = chunk_len;
1238            n = d_len - i_len;
1239        }
1240        let (rs_lo, rs_hi) = rs.split_at_mut(n);
1241        // Compute the next block of quotient limbs by multiplying the inverse by the upper part of
1242        // the partial remainder.
1243        let mut mul_scratch = vec![0; limbs_mul_same_length_to_out_scratch_len(is.len())];
1244        limbs_mul_same_length_to_out(scratch, rs_hi, is, &mut mul_scratch);
1245        // The inverse's most significant bit is implicit.
1246        assert!(!limbs_add_same_length_to_out(
1247            qs,
1248            &scratch[i_len..i_len << 1],
1249            rs_hi,
1250        ));
1251        // Compute the product of the quotient block and the divisor, to be subtracted from the
1252        // partial remainder combined with new limbs from the dividend. We only really need the low
1253        // d_len + 1 limbs.
1254        if i_len < MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD {
1255            let mut mul_scratch = vec![0; limbs_mul_greater_to_out_scratch_len(ds.len(), qs.len())];
1256            limbs_mul_greater_to_out(scratch, ds, qs, &mut mul_scratch);
1257        } else {
1258            limbs_div_barrett_large_product(scratch, ds, qs, rs_hi, scratch_len, i_len);
1259        }
1260        let mut r = rs_hi[0].wrapping_sub(scratch[d_len]);
1261        // Subtract the product from the partial remainder combined with new limbs from the
1262        // dividend, generating a new partial remainder.
1263        let scratch = &mut scratch[..d_len];
1264        let carry = if n == 0 {
1265            // Get next i_len limbs from n.
1266            limbs_sub_same_length_to_out(rs, ns, scratch)
1267        } else {
1268            let (scratch_lo, scratch_hi) = scratch.split_at_mut(i_len);
1269            // Get next i_len limbs from n.
1270            let carry = limbs_sub_same_length_with_borrow_in_in_place_right(
1271                rs_lo,
1272                scratch_hi,
1273                limbs_sub_same_length_in_place_right(ns, scratch_lo),
1274            );
1275            rs.copy_from_slice(scratch);
1276            carry
1277        };
1278        // Check the remainder and adjust the quotient as needed.
1279        if carry {
1280            r.wrapping_sub_assign(1);
1281        }
1282        while r != 0 {
1283            // We loop 0 times with about 69% probability, 1 time with about 31% probability, and 2
1284            // times with about 0.6% probability, if the inverse is computed as recommended.
1285            assert!(!limbs_slice_add_limb_in_place(qs, 1));
1286            if limbs_sub_same_length_in_place_left(rs, ds) {
1287                r -= 1;
1288            }
1289        }
1290        if limbs_cmp_same_length(rs, ds) >= Equal {
1291            // This is executed with about 76% probability.
1292            assert!(!limbs_slice_add_limb_in_place(qs, 1));
1293            limbs_sub_same_length_in_place_left(rs, ds);
1294        }
1295    }
1296    highest_q
1297}
1298
1299// We distinguish 3 cases:
1300//
1301// - d_len < q_len:              i_len = ceil(q_len / ceil(q_len / d_len))
1302// - d_len / 3 < q_len <= d_len: i_len = ceil(q_len / 2)
1303// - q_len < d_len / 3:          i_len = q_len
1304//
1305// In all cases we have i_len <= d_len.
1306//
1307// # Worst-case complexity
1308// Constant time and additional memory.
1309//
1310// Result is O(`q_len`)
1311//
1312// This is equivalent to `mpn_mu_div_qr_choose_in` from `mpn/generic/mu_div_qr.c`, GMP 6.2.1, where
1313// `k == 0`.
1314pub_const_crate_test! {limbs_div_mod_barrett_is_len(q_len: usize, d_len: usize) -> usize {
1315    let q_len_minus_1 = q_len - 1;
1316    if q_len > d_len {
1317        // Compute an inverse size that is a nice partition of the quotient.
1318        let b = q_len_minus_1 / d_len + 1; // ceil(q_len / d_len), number of blocks
1319        q_len_minus_1 / b + 1 // ceil(q_len / b) = ceil(q_len / ceil(q_len / d_len))
1320    } else if 3 * q_len > d_len {
1321        (q_len_minus_1 >> 1) + 1 // b = 2
1322    } else {
1323        q_len_minus_1 + 1 // b = 1
1324    }
1325}}
1326
1327// # Worst-case complexity
1328// $T(n) = O(n \log n \log\log n)$
1329//
1330// $M(n) = O(n \log n)$
1331//
1332// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
1333//
1334// This is equivalent to `mpn_mu_div_qr2` from `mpn/generic/mu_div_qr.c`, GMP 6.2.1.
1335pub_crate_test! {limbs_div_mod_barrett_helper(
1336    qs: &mut [Limb],
1337    rs: &mut [Limb],
1338    ns: &[Limb],
1339    ds: &[Limb],
1340    scratch: &mut [Limb],
1341) -> bool {
1342    let n_len = ns.len();
1343    let d_len = ds.len();
1344    assert_eq!(rs.len(), d_len);
1345    assert!(d_len > 1);
1346    assert!(n_len > d_len);
1347    let q_len = n_len - d_len;
1348    // Compute the inverse size.
1349    let i_len = limbs_div_mod_barrett_is_len(q_len, d_len);
1350    assert!(i_len <= d_len);
1351    let i_len_plus_1 = i_len + 1;
1352    let (is, scratch_hi) = scratch.split_at_mut(i_len_plus_1);
1353    // compute an approximate inverse on i_len + 1 limbs
1354    if d_len == i_len {
1355        let (scratch_lo, scratch_hi) = scratch_hi.split_at_mut(i_len_plus_1);
1356        let (scratch_first, scratch_lo_tail) = scratch_lo.split_first_mut().unwrap();
1357        scratch_lo_tail.copy_from_slice(&ds[..i_len]);
1358        *scratch_first = 1;
1359        limbs_invert_approx(is, scratch_lo, scratch_hi);
1360        slice_move_left(is, 1);
1361    } else if limbs_add_limb_to_out(scratch_hi, &ds[d_len - i_len_plus_1..], 1) {
1362        slice_set_zero(&mut is[..i_len]);
1363    } else {
1364        let (scratch_lo, scratch_hi) = scratch_hi.split_at_mut(i_len_plus_1);
1365        limbs_invert_approx(is, scratch_lo, scratch_hi);
1366        slice_move_left(is, 1);
1367    }
1368    let (scratch_lo, scratch_hi) = scratch.split_at_mut(i_len);
1369    limbs_div_mod_barrett_preinverted(qs, rs, ns, ds, scratch_lo, scratch_hi)
1370}}
1371
1372// # Worst-case complexity
1373// Constant time and additional memory.
1374//
1375// Result is O(`d_len`)
1376//
1377// This is equivalent to `mpn_preinv_mu_div_qr_itch` from `mpn/generic/mu_div_qr.c`, GMP 6.2.1, but
1378// `nn` is omitted from the arguments as it is unused.
1379fn limbs_div_mod_barrett_preinverse_scratch_len(d_len: usize, is_len: usize) -> usize {
1380    let itch_local = limbs_mul_mod_base_pow_n_minus_1_next_size(d_len + 1);
1381    let itch_out = limbs_mul_mod_base_pow_n_minus_1_scratch_len(itch_local, d_len, is_len);
1382    itch_local + itch_out
1383}
1384
1385// # Worst-case complexity
1386// Constant time and additional memory.
1387//
1388// This is equivalent to `mpn_invertappr_itch` from `gmp-impl.h`, GMP 6.2.1.
1389pub(crate) const fn limbs_invert_approx_scratch_len(is_len: usize) -> usize {
1390    is_len << 1
1391}
1392
1393// # Worst-case complexity
1394// Constant time and additional memory.
1395//
1396// Result is O(`n_len`)
1397//
1398// This is equivalent to `mpn_mu_div_qr_itch` from `mpn/generic/mu_div_qr.c`, GMP 6.2.1, where
1399// `mua_k == 0`.
1400pub_crate_test! {limbs_div_mod_barrett_scratch_len(n_len: usize, d_len: usize) -> usize {
1401    let is_len = limbs_div_mod_barrett_is_len(n_len - d_len, d_len);
1402    let preinverse_len = limbs_div_mod_barrett_preinverse_scratch_len(d_len, is_len);
1403    // 3 * is_len + 4
1404    let inv_approx_len = limbs_invert_approx_scratch_len(is_len + 1) + is_len + 2;
1405    assert!(preinverse_len >= inv_approx_len);
1406    is_len + preinverse_len
1407}}
1408
1409// # Worst-case complexity
1410// $T(n) = O(n \log n \log\log n)$
1411//
1412// $M(n) = O(n \log n)$
1413//
1414// where $T$ is time, $M$ is additional memory, and $n$ is `ds.len()`.
1415pub_test! {limbs_div_mod_barrett_large_helper(
1416    qs: &mut [Limb],
1417    rs: &mut [Limb],
1418    ns: &[Limb],
1419    ds: &[Limb],
1420    scratch: &mut [Limb],
1421) -> bool {
1422    let n_len = ns.len();
1423    let d_len = ds.len();
1424    let q_len = qs.len();
1425    let q_len_plus_one = q_len + 1;
1426    let n = n_len - q_len - q_len_plus_one; // 2 * d_len - n_len - 1
1427    let (ns_lo, ns_hi) = ns.split_at(n);
1428    let (ds_lo, ds_hi) = ds.split_at(d_len - q_len_plus_one);
1429    let (rs_lo, rs_hi) = rs.split_at_mut(n);
1430    let rs_hi = &mut rs_hi[..q_len_plus_one];
1431    let mut highest_q = limbs_div_mod_barrett_helper(qs, rs_hi, ns_hi, ds_hi, scratch);
1432    // Multiply the quotient by the divisor limbs ignored above. The product is d_len - 1 limbs
1433    // long.
1434    let mut mul_scratch = vec![0; limbs_mul_to_out_scratch_len(ds_lo.len(), qs.len())];
1435    limbs_mul_to_out(scratch, ds_lo, qs, &mut mul_scratch);
1436    let (scratch_last, scratch_init) = scratch[..d_len].split_last_mut().unwrap();
1437    *scratch_last = Limb::from(
1438        highest_q && limbs_slice_add_same_length_in_place_left(&mut scratch_init[q_len..], ds_lo),
1439    );
1440    let (scratch_lo, scratch_hi) = scratch.split_at(n);
1441    let scratch_hi = &scratch_hi[..q_len_plus_one];
1442    if limbs_sub_same_length_with_borrow_in_in_place_left(
1443        rs_hi,
1444        scratch_hi,
1445        limbs_sub_same_length_to_out(rs_lo, ns_lo, scratch_lo),
1446    ) {
1447        if limbs_sub_limb_in_place(qs, 1) {
1448            assert!(highest_q);
1449            highest_q = false;
1450        }
1451        limbs_slice_add_same_length_in_place_left(&mut rs[..d_len], ds);
1452    }
1453    highest_q
1454}}
1455
1456// Block-wise Barrett division. The idea of the algorithm used herein is to compute a smaller
1457// inverted value than used in the standard Barrett algorithm, and thus save time in the Newton
1458// iterations, and pay just a small price when using the inverted value for developing quotient
1459// bits. This algorithm was presented at ICMS 2006.
1460//
1461// `ns` must have length at least 3, `ds` must have length at least 2 and be no longer than `ns`,
1462// and the most significant bit of `ds` must be set.
1463//
1464// # Worst-case complexity
1465// $T(n) = O(n \log n \log\log n)$
1466//
1467// $M(n) = O(n \log n)$
1468//
1469// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
1470//
1471// # Panics
1472// Panics if `ds` has length smaller than 2, `ns.len()` is less than `ds.len()`, `qs` has length
1473// less than `ns.len()` - `ds.len()`, `scratch` is too short, or the last limb of `ds` does not have
1474// its highest bit set.
1475//
1476// This is equivalent to `mpn_mu_div_qr` from `mpn/generic/mu_div_qr.c`, GMP 6.2.1.
1477pub_test! {limbs_div_mod_barrett(
1478    qs: &mut [Limb],
1479    rs: &mut [Limb],
1480    ns: &[Limb],
1481    ds: &[Limb],
1482    scratch: &mut [Limb],
1483) -> bool {
1484    let n_len = ns.len();
1485    let d_len = ds.len();
1486    let q_len = n_len - d_len;
1487    let qs = &mut qs[..q_len];
1488    // Test whether 2 * d_len - n_len > MU_DIV_QR_SKEW_THRESHOLD
1489    if d_len <= q_len + MU_DIV_QR_SKEW_THRESHOLD {
1490        limbs_div_mod_barrett_helper(qs, &mut rs[..d_len], ns, ds, scratch)
1491    } else {
1492        limbs_div_mod_barrett_large_helper(qs, rs, ns, ds, scratch)
1493    }
1494}}
1495
1496// `ds` must have length 2, `ns` must have length at least 2, `qs` must have length at least
1497// `ns.len() - 2`, `rs` must have length at least 2, and the most-significant limb of `ds` must be
1498// nonzero.
1499//
1500// # Worst-case complexity
1501// $T(n) = O(n)$
1502//
1503// $M(n) = O(n)$
1504//
1505// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
1506fn limbs_div_mod_by_two_limb(qs: &mut [Limb], rs: &mut [Limb], ns: &[Limb], ds: &[Limb]) {
1507    let n_len = ns.len();
1508    let ds_1 = ds[1];
1509    let bits = LeadingZeros::leading_zeros(ds_1);
1510    if bits == 0 {
1511        let mut ns = ns.to_vec();
1512        // always store n_len - 1 quotient limbs
1513        qs[n_len - 2] = Limb::from(limbs_div_mod_by_two_limb_normalized(qs, &mut ns, ds));
1514        rs[0] = ns[0];
1515        rs[1] = ns[1];
1516    } else {
1517        let ds_0 = ds[0];
1518        let cobits = Limb::WIDTH - bits;
1519        let mut ns_shifted = vec![0; n_len + 1];
1520        let ns_shifted = &mut ns_shifted;
1521        let carry = limbs_shl_to_out(ns_shifted, ns, bits);
1522        let ds_shifted = &mut [ds_0 << bits, (ds_1 << bits) | (ds_0 >> cobits)];
1523        if carry == 0 {
1524            // always store n_len - 1 quotient limbs
1525            qs[n_len - 2] = Limb::from(limbs_div_mod_by_two_limb_normalized(
1526                qs,
1527                &mut ns_shifted[..n_len],
1528                ds_shifted,
1529            ));
1530        } else {
1531            ns_shifted[n_len] = carry;
1532            limbs_div_mod_by_two_limb_normalized(qs, ns_shifted, ds_shifted);
1533        }
1534        let ns_shifted_1 = ns_shifted[1];
1535        rs[0] = (ns_shifted[0] >> bits) | (ns_shifted_1 << cobits);
1536        rs[1] = ns_shifted_1 >> bits;
1537    }
1538}
1539
1540// TODO tune
1541pub(crate) const MUPI_DIV_QR_THRESHOLD: usize = 74;
1542
1543// # Worst-case complexity
1544// Constant time and additional memory.
1545fn limbs_div_mod_dc_condition(n_len: usize, d_len: usize) -> bool {
1546    let n_64 = n_len as f64;
1547    let d_64 = d_len as f64;
1548    d_len < MUPI_DIV_QR_THRESHOLD
1549        || n_len < MU_DIV_QR_THRESHOLD << 1
1550        || fma!(
1551            ((MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD) << 1) as f64,
1552            d_64,
1553            MUPI_DIV_QR_THRESHOLD as f64 * n_64
1554        ) > d_64 * n_64
1555}
1556
1557// This function is optimized for the case when the numerator has at least twice the length of the
1558// denominator.
1559//
1560// `ds` must have length at least 3, `ns` must be at least as long as `ds`, `qs` must have length at
1561// least `ns.len() - ds.len() + 1`, `rs` must have the same length as `ds`, and the most-
1562// significant limb of `ds` must be nonzero.
1563//
1564// # Worst-case complexity
1565// $T(n) = O(n \log n \log\log n)$
1566//
1567// $M(n) = O(n \log n)$
1568//
1569// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
1570fn limbs_div_mod_unbalanced(
1571    qs: &mut [Limb],
1572    rs: &mut [Limb],
1573    ns: &[Limb],
1574    ds: &[Limb],
1575    adjusted_n_len: usize,
1576) {
1577    let mut n_len = ns.len();
1578    let d_len = ds.len();
1579    qs[n_len - d_len] = 0; // zero high quotient limb
1580    let mut ds_shifted_vec;
1581    let ds_shifted: &[Limb];
1582    let mut ns_shifted_vec = vec![0; n_len + 1];
1583    let ns_shifted = &mut ns_shifted_vec;
1584    let bits = LeadingZeros::leading_zeros(*ds.last().unwrap());
1585    if bits == 0 {
1586        ds_shifted = ds;
1587        ns_shifted[..n_len].copy_from_slice(ns);
1588    } else {
1589        // normalize divisor
1590        ds_shifted_vec = vec![0; d_len];
1591        limbs_shl_to_out(&mut ds_shifted_vec, ds, bits);
1592        ds_shifted = &ds_shifted_vec;
1593        let (ns_shifted_last, ns_shifted_init) = ns_shifted.split_last_mut().unwrap();
1594        *ns_shifted_last = limbs_shl_to_out(ns_shifted_init, ns, bits);
1595    }
1596    n_len = adjusted_n_len;
1597    let d_inv = limbs_two_limb_inverse_helper(ds_shifted[d_len - 1], ds_shifted[d_len - 2]);
1598    let ns_shifted = &mut ns_shifted[..n_len];
1599    if d_len < DC_DIV_QR_THRESHOLD {
1600        limbs_div_mod_schoolbook(qs, ns_shifted, ds_shifted, d_inv);
1601        let ns_shifted = &ns_shifted[..d_len];
1602        if bits == 0 {
1603            rs.copy_from_slice(ns_shifted);
1604        } else {
1605            limbs_shr_to_out(rs, ns_shifted, bits);
1606        }
1607    } else if limbs_div_mod_dc_condition(n_len, d_len) {
1608        limbs_div_mod_divide_and_conquer(qs, ns_shifted, ds_shifted, d_inv);
1609        let ns_shifted = &ns_shifted[..d_len];
1610        if bits == 0 {
1611            rs.copy_from_slice(ns_shifted);
1612        } else {
1613            limbs_shr_to_out(rs, ns_shifted, bits);
1614        }
1615    } else {
1616        let scratch_len = limbs_div_mod_barrett_scratch_len(n_len, d_len);
1617        let mut scratch = vec![0; scratch_len];
1618        limbs_div_mod_barrett(qs, rs, ns_shifted, ds_shifted, &mut scratch);
1619        if bits != 0 {
1620            limbs_slice_shr_in_place(rs, bits);
1621        }
1622    }
1623}
1624
1625// The numerator must have less than twice the length of the denominator.
1626//
1627// Problem:
1628//
1629// Divide a numerator N with `n_len` limbs by a denominator D with `d_len` limbs, forming a quotient
1630// of `q_len` = `n_len` - `d_len` + 1 limbs. When `q_len` is small compared to `d_len`, conventional
1631// division algorithms perform poorly. We want an algorithm that has an expected running time that
1632// is dependent only on `q_len`.
1633//
1634// Algorithm (very informally stated):
1635//
1636// 1) Divide the 2 * `q_len` most significant limbs from the numerator by the `q_len` most-
1637// significant limbs from the denominator. Call the result `qest`. This is either the correct
1638// quotient, or 1 or 2 too large. Compute the remainder from the division.
1639//
1640// 2) Is the most significant limb from the remainder < p, where p is the product of the most-
1641// significant limb from the quotient and the next(d)? (Next(d) denotes the next ignored limb from
1642// the denominator.)  If it is, decrement `qest`, and adjust the remainder accordingly.
1643//
1644// 3) Is the remainder >= `qest`?  If it is, `qest` is the desired quotient. The algorithm
1645// terminates.
1646//
1647// 4) Subtract `qest` * next(d) from the remainder. If there is borrow out, decrement `qest`, and
1648// adjust the remainder accordingly.
1649//
1650// 5) Skip one word from the denominator (i.e., let next(d) denote the next less significant limb).
1651//
1652// `ds` must have length at least 3, `ns` must be at least as long as `ds` but no more than twice as
1653// long, `qs` must have length at least `ns.len() - ds.len() + 1`,`rs` must have the same length as
1654// `ds`, and the most-significant limb of `ds` must be nonzero.
1655//
1656// # Worst-case complexity
1657// $T(n) = O(n \log n \log\log n)$
1658//
1659// $M(n) = O(n \log n)$
1660//
1661// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
1662pub(crate) fn limbs_div_mod_balanced(
1663    qs: &mut [Limb],
1664    rs: &mut [Limb],
1665    ns: &[Limb],
1666    ds: &[Limb],
1667    adjust: bool,
1668) {
1669    let n_len = ns.len();
1670    let d_len = ds.len();
1671    let mut q_len = n_len - d_len;
1672    assert!(d_len >= q_len);
1673    qs[q_len] = 0; // zero high quotient limb
1674    if adjust {
1675        q_len += 1;
1676    } else if q_len == 0 {
1677        rs.copy_from_slice(&ns[..d_len]);
1678        return;
1679    }
1680    let q_len = q_len;
1681    // `i_len` is the (at least partially) ignored number of limbs.
1682    let i_len = d_len - q_len;
1683    // Normalize the denominator by shifting it to the left such that its most significant bit is
1684    // set. Then shift the numerator the same amount, to mathematically preserve the quotient.
1685    let bits = LeadingZeros::leading_zeros(ds[d_len - 1]);
1686    let cobits = Limb::WIDTH - bits;
1687    let q_len_2 = q_len << 1;
1688    let m = n_len - q_len_2;
1689    let mut ns_shifted_vec = vec![0; q_len_2 + 1];
1690    let mut ds_shifted_vec;
1691    let ds_shifted: &[Limb];
1692    let ds_hi = &ds[i_len..];
1693    let ds_lo_last = ds[i_len - 1];
1694    let carry = if bits == 0 {
1695        ds_shifted = ds_hi;
1696        ns_shifted_vec[..q_len_2].copy_from_slice(&ns[m..]);
1697        0
1698    } else {
1699        ds_shifted_vec = vec![0; q_len];
1700        limbs_shl_to_out(&mut ds_shifted_vec, ds_hi, bits);
1701        ds_shifted_vec[0] |= ds_lo_last >> cobits;
1702        ds_shifted = &ds_shifted_vec;
1703        let carry = limbs_shl_to_out(&mut ns_shifted_vec, &ns[m..], bits);
1704        if !adjust {
1705            ns_shifted_vec[0] |= ns[m - 1] >> cobits;
1706        }
1707        carry
1708    };
1709    let ns_shifted = if adjust {
1710        ns_shifted_vec[q_len_2] = carry;
1711        &mut ns_shifted_vec[1..]
1712    } else {
1713        &mut ns_shifted_vec
1714    };
1715    // Get an approximate quotient using the extracted operands.
1716    if q_len == 1 {
1717        (qs[0], ns_shifted[0]) =
1718            Limb::xx_div_mod_y_to_qr(ns_shifted[1], ns_shifted[0], ds_shifted[0]);
1719    } else if q_len == 2 {
1720        limbs_div_mod_by_two_limb_normalized(qs, ns_shifted, ds_shifted);
1721    } else {
1722        let ns_shifted = &mut ns_shifted[..q_len_2];
1723        let d_inv = limbs_two_limb_inverse_helper(ds_shifted[q_len - 1], ds_shifted[q_len - 2]);
1724        if q_len < DC_DIV_QR_THRESHOLD {
1725            limbs_div_mod_schoolbook(qs, ns_shifted, ds_shifted, d_inv);
1726        } else if q_len < MU_DIV_QR_THRESHOLD {
1727            limbs_div_mod_divide_and_conquer(qs, ns_shifted, ds_shifted, d_inv);
1728        } else {
1729            let mut scratch = vec![0; limbs_div_mod_barrett_scratch_len(q_len_2, q_len)];
1730            limbs_div_mod_barrett(qs, rs, ns_shifted, ds_shifted, &mut scratch);
1731            ns_shifted[..q_len].copy_from_slice(&rs[..q_len]);
1732        }
1733    }
1734    // Multiply the first ignored divisor limb by the most significant quotient limb. If that
1735    // product is > the partial remainder's most significant limb, we know the quotient is too
1736    // large. This test quickly catches most cases where the quotient is too large; it catches all
1737    // cases where the quotient is 2 too large.
1738    let mut r_len = q_len;
1739    let mut x = ds_lo_last << bits;
1740    if i_len >= 2 {
1741        x |= ds[i_len - 2] >> 1 >> (!bits & Limb::WIDTH_MASK);
1742    }
1743    if ns_shifted[q_len - 1] < (DoubleLimb::from(x) * DoubleLimb::from(qs[q_len - 1])).upper_half()
1744    {
1745        assert!(!limbs_sub_limb_in_place(qs, 1));
1746        let carry = limbs_slice_add_same_length_in_place_left(&mut ns_shifted[..q_len], ds_shifted);
1747        if carry {
1748            // The partial remainder is safely large.
1749            ns_shifted[q_len] = 1;
1750            r_len += 1;
1751        }
1752    }
1753    let mut q_too_large = false;
1754    let mut do_extra_cleanup = true;
1755    let mut scratch = vec![0; d_len];
1756    let mut i_len_alt = i_len;
1757    let qs_lo = &mut qs[..q_len];
1758    if bits != 0 {
1759        // Append the partially used numerator limb to the partial remainder.
1760        let carry_1 = limbs_slice_shl_in_place(&mut ns_shifted[..r_len], cobits);
1761        let mask = Limb::MAX >> bits;
1762        ns_shifted[0] |= ns[i_len - 1] & mask;
1763        // Update partial remainder with partially used divisor limb.
1764        let (ns_shifted_last, ns_shifted_init) = ns_shifted[..=q_len].split_last_mut().unwrap();
1765        let carry_2 = limbs_sub_mul_limb_same_length_in_place_left(
1766            ns_shifted_init,
1767            qs_lo,
1768            ds[i_len - 1] & mask,
1769        );
1770        if q_len == r_len {
1771            (*ns_shifted_last, q_too_large) = carry_1.overflowing_sub(carry_2);
1772            r_len += 1;
1773        } else {
1774            assert!(*ns_shifted_last >= carry_2);
1775            ns_shifted_last.wrapping_sub_assign(carry_2);
1776        }
1777        i_len_alt -= 1;
1778    }
1779    // True: partial remainder now is neutral, i.e., it is not shifted up.
1780    if i_len_alt == 0 {
1781        rs.copy_from_slice(&ns_shifted[..r_len]);
1782        do_extra_cleanup = false;
1783    } else {
1784        let mut mul_scratch = vec![0; limbs_mul_to_out_scratch_len(qs_lo.len(), i_len_alt)];
1785        limbs_mul_to_out(&mut scratch, qs_lo, &ds[..i_len_alt], &mut mul_scratch);
1786    }
1787    if do_extra_cleanup {
1788        let (scratch_lo, scratch_hi) = scratch.split_at_mut(i_len_alt);
1789        q_too_large |=
1790            limbs_sub_greater_in_place_left(&mut ns_shifted[..r_len], &scratch_hi[..q_len]);
1791        let (rs_lo, rs_hi) = rs.split_at_mut(i_len_alt);
1792        let rs_hi_len = rs_hi.len();
1793        rs_hi.copy_from_slice(&ns_shifted[..rs_hi_len]);
1794        q_too_large |= limbs_sub_same_length_to_out(rs_lo, &ns[..i_len_alt], scratch_lo)
1795            && limbs_sub_limb_in_place(&mut rs_hi[..min(rs_hi_len, r_len)], 1);
1796    }
1797    if q_too_large {
1798        assert!(!limbs_sub_limb_in_place(qs, 1));
1799        limbs_slice_add_same_length_in_place_left(rs, ds);
1800    }
1801}
1802
1803// Interpreting two slices of `Limb`s, `ns` and `ds`, as the limbs (in ascending order) of two
1804// `Natural`s, divides them, returning the quotient and remainder. The quotient has `ns.len() -
1805// ds.len() + 1` limbs and the remainder `ds.len()` limbs.
1806//
1807// `ns` must be at least as long as `ds` and `ds` must have length at least 2 and its most
1808// significant limb must be greater than zero.
1809//
1810// # Worst-case complexity
1811// $T(n) = O(n \log n \log\log n)$
1812//
1813// $M(n) = O(n \log n)$
1814//
1815// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
1816//
1817// # Panics
1818// Panics if `ns` is shorter than `ds`, `ds` has length less than 2, or the most-significant limb of
1819// `ds` is zero.
1820//
1821// This is equivalent to `mpn_tdiv_qr` from `mpn/generic/tdiv_qr.c`, GMP 6.2.1, where `dn > 1` and
1822// `qp` and `rp` are returned.
1823pub_test! {limbs_div_mod(ns: &[Limb], ds: &[Limb]) -> (Vec<Limb>, Vec<Limb>) {
1824    let d_len = ds.len();
1825    let mut qs = vec![0; ns.len() - d_len + 1];
1826    let mut rs = vec![0; d_len];
1827    limbs_div_mod_to_out(&mut qs, &mut rs, ns, ds);
1828    (qs, rs)
1829}}
1830
1831// Interpreting two slices of `Limb`s, `ns` and `ds`, as the limbs (in ascending order) of two
1832// `Natural`s, divides them, writing the `ns.len() - ds.len() + 1` limbs of the quotient to `qs` and
1833// the `ds.len()` limbs of the remainder to `rs`.
1834//
1835// `ns` must be at least as long as `ds`, `qs` must have length at least `ns.len() - ds.len() + 1`,
1836// `rs` must be at least as long as `ds`, and `ds` must have length at least 2 and its most
1837// significant limb must be greater than zero.
1838//
1839// # Worst-case complexity
1840// $T(n) = O(n \log n \log\log n)$
1841//
1842// $M(n) = O(n \log n)$
1843//
1844// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
1845//
1846// # Panics
1847// Panics if `qs` or `rs` are too short, `ns` is shorter than `ds`, `ds` has length less than 2, or
1848// the most-significant limb of `ds` is zero.
1849//
1850// This is equivalent to `mpn_tdiv_qr` from `mpn/generic/tdiv_qr.c`, GMP 6.2.1, where `dn > 1`.
1851pub_crate_test! {limbs_div_mod_to_out(qs: &mut [Limb], rs: &mut [Limb], ns: &[Limb], ds: &[Limb]) {
1852    let n_len = ns.len();
1853    let d_len = ds.len();
1854    assert!(d_len > 1);
1855    assert!(n_len >= d_len);
1856    assert!(qs.len() > n_len - d_len);
1857    let rs = &mut rs[..d_len];
1858    let ds_last = *ds.last().unwrap();
1859    assert!(ds_last != 0);
1860    if d_len == 2 {
1861        limbs_div_mod_by_two_limb(qs, rs, ns, ds);
1862    } else {
1863        // conservative tests for quotient size
1864        let adjust = ns[n_len - 1] >= ds_last;
1865        let adjusted_n_len = if adjust { n_len + 1 } else { n_len };
1866        if adjusted_n_len < d_len << 1 {
1867            limbs_div_mod_balanced(qs, rs, ns, ds, adjust);
1868        } else {
1869            limbs_div_mod_unbalanced(qs, rs, ns, ds, adjusted_n_len);
1870        }
1871    }
1872}}
1873
1874// TODO improve!
1875//
1876// # Worst-case complexity
1877// $T(n) = O(n \log n \log\log n)$
1878//
1879// $M(n) = O(n \log n)$
1880//
1881// where $T$ is time, $M$ is additional memory, and $n$ is `ns.len()`.
1882pub_crate_test! {limbs_div_mod_qs_to_out_rs_to_ns(qs: &mut [Limb], ns: &mut [Limb], ds: &[Limb]) {
1883    let ns_copy = ns.to_vec();
1884    limbs_div_mod_to_out(qs, ns, &ns_copy, ds);
1885}}
1886
1887impl Natural {
1888    fn div_mod_limb_ref(&self, other: Limb) -> (Self, Limb) {
1889        match (self, other) {
1890            (_, 0) => panic!("division by zero"),
1891            (n, 1) => (n.clone(), 0),
1892            (Self(Small(small)), other) => {
1893                let (q, r) = small.div_rem(other);
1894                (Self(Small(q)), r)
1895            }
1896            (Self(Large(limbs)), other) => {
1897                let (qs, r) = limbs_div_limb_mod(limbs, other);
1898                (Self::from_owned_limbs_asc(qs), r)
1899            }
1900        }
1901    }
1902
1903    pub_test! {div_assign_mod_limb(&mut self, other: Limb) -> Limb {
1904        match (&mut *self, other) {
1905            (_, 0) => panic!("division by zero"),
1906            (_, 1) => 0,
1907            (Natural(Small(small)), other) => small.div_assign_rem(other),
1908            (Natural(Large(limbs)), other) => {
1909                let r = limbs_div_limb_in_place_mod(limbs, other);
1910                self.trim();
1911                r
1912            }
1913        }
1914    }}
1915}
1916
1917impl DivMod<Self> for Natural {
1918    type DivOutput = Self;
1919    type ModOutput = Self;
1920
1921    /// Divides a [`Natural`] by another [`Natural`], taking both by value and returning the
1922    /// quotient and remainder. The quotient is rounded towards negative infinity.
1923    ///
1924    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
1925    ///
1926    /// $$
1927    /// f(x, y) = \left ( \left \lfloor \frac{x}{y} \right \rfloor, \space
1928    /// x - y\left \lfloor \frac{x}{y} \right \rfloor \right ).
1929    /// $$
1930    ///
1931    /// # Worst-case complexity
1932    /// $T(n) = O(n \log n \log\log n)$
1933    ///
1934    /// $M(n) = O(n \log n)$
1935    ///
1936    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
1937    ///
1938    /// # Panics
1939    /// Panics if `other` is zero.
1940    ///
1941    /// # Examples
1942    /// ```
1943    /// use core::str::FromStr;
1944    /// use malachite_base::num::arithmetic::traits::DivMod;
1945    /// use malachite_base::strings::ToDebugString;
1946    /// use malachite_nz::natural::Natural;
1947    ///
1948    /// // 2 * 10 + 3 = 23
1949    /// assert_eq!(
1950    ///     Natural::from(23u32)
1951    ///         .div_mod(Natural::from(10u32))
1952    ///         .to_debug_string(),
1953    ///     "(2, 3)"
1954    /// );
1955    ///
1956    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
1957    /// assert_eq!(
1958    ///     Natural::from_str("1000000000000000000000000")
1959    ///         .unwrap()
1960    ///         .div_mod(Natural::from_str("1234567890987").unwrap())
1961    ///         .to_debug_string(),
1962    ///     "(810000006723, 530068894399)"
1963    /// );
1964    /// ```
1965    #[inline]
1966    fn div_mod(mut self, other: Self) -> (Self, Self) {
1967        let r = self.div_assign_mod(other);
1968        (self, r)
1969    }
1970}
1971
1972impl<'a> DivMod<&'a Self> for Natural {
1973    type DivOutput = Self;
1974    type ModOutput = Self;
1975
1976    /// Divides a [`Natural`] by another [`Natural`], taking the first by value and the second by
1977    /// reference and returning the quotient and remainder. The quotient is rounded towards negative
1978    /// infinity.
1979    ///
1980    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
1981    ///
1982    /// $$
1983    /// f(x, y) = \left ( \left \lfloor \frac{x}{y} \right \rfloor, \space
1984    /// x - y\left \lfloor \frac{x}{y} \right \rfloor \right ).
1985    /// $$
1986    ///
1987    /// # Worst-case complexity
1988    /// $T(n) = O(n \log n \log\log n)$
1989    ///
1990    /// $M(n) = O(n \log n)$
1991    ///
1992    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
1993    ///
1994    /// # Panics
1995    /// Panics if `other` is zero.
1996    ///
1997    /// # Examples
1998    /// ```
1999    /// use core::str::FromStr;
2000    /// use malachite_base::num::arithmetic::traits::DivMod;
2001    /// use malachite_base::strings::ToDebugString;
2002    /// use malachite_nz::natural::Natural;
2003    ///
2004    /// // 2 * 10 + 3 = 23
2005    /// assert_eq!(
2006    ///     Natural::from(23u32)
2007    ///         .div_mod(&Natural::from(10u32))
2008    ///         .to_debug_string(),
2009    ///     "(2, 3)"
2010    /// );
2011    ///
2012    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2013    /// assert_eq!(
2014    ///     Natural::from_str("1000000000000000000000000")
2015    ///         .unwrap()
2016    ///         .div_mod(&Natural::from_str("1234567890987").unwrap())
2017    ///         .to_debug_string(),
2018    ///     "(810000006723, 530068894399)"
2019    /// );
2020    /// ```
2021    #[inline]
2022    fn div_mod(mut self, other: &'a Self) -> (Self, Self) {
2023        let r = self.div_assign_mod(other);
2024        (self, r)
2025    }
2026}
2027
2028impl DivMod<Natural> for &Natural {
2029    type DivOutput = Natural;
2030    type ModOutput = Natural;
2031
2032    /// Divides a [`Natural`] by another [`Natural`], taking the first by reference and the second
2033    /// by value and returning the quotient and remainder. The quotient is rounded towards negative
2034    /// infinity.
2035    ///
2036    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2037    ///
2038    /// $$
2039    /// f(x, y) = \left ( \left \lfloor \frac{x}{y} \right \rfloor, \space
2040    /// x - y\left \lfloor \frac{x}{y} \right \rfloor \right ).
2041    /// $$
2042    ///
2043    /// # Worst-case complexity
2044    /// $T(n) = O(n \log n \log\log n)$
2045    ///
2046    /// $M(n) = O(n \log n)$
2047    ///
2048    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2049    ///
2050    /// # Panics
2051    /// Panics if `other` is zero.
2052    ///
2053    /// # Examples
2054    /// ```
2055    /// use core::str::FromStr;
2056    /// use malachite_base::num::arithmetic::traits::DivMod;
2057    /// use malachite_base::strings::ToDebugString;
2058    /// use malachite_nz::natural::Natural;
2059    ///
2060    /// // 2 * 10 + 3 = 23
2061    /// assert_eq!(
2062    ///     (&Natural::from(23u32))
2063    ///         .div_mod(Natural::from(10u32))
2064    ///         .to_debug_string(),
2065    ///     "(2, 3)"
2066    /// );
2067    ///
2068    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2069    /// assert_eq!(
2070    ///     (&Natural::from_str("1000000000000000000000000").unwrap())
2071    ///         .div_mod(Natural::from_str("1234567890987").unwrap())
2072    ///         .to_debug_string(),
2073    ///     "(810000006723, 530068894399)"
2074    /// );
2075    /// ```
2076    fn div_mod(self, mut other: Natural) -> (Natural, Natural) {
2077        if *self == other {
2078            return (Natural::ONE, Natural::ZERO);
2079        }
2080        match (self, &mut other) {
2081            (_, &mut Natural::ZERO) => panic!("division by zero"),
2082            (n, &mut Natural::ONE) => (n.clone(), Natural::ZERO),
2083            (n, &mut Natural(Small(d))) => {
2084                let (q, r) = n.div_mod_limb_ref(d);
2085                (q, Natural(Small(r)))
2086            }
2087            (Natural(Small(_)), _) => (Natural::ZERO, self.clone()),
2088            (Natural(Large(ns)), Natural(Large(ds))) => {
2089                if ns.len() < ds.len() {
2090                    (Natural::ZERO, self.clone())
2091                } else {
2092                    let (qs, mut rs) = limbs_div_mod(ns, ds);
2093                    swap(&mut rs, ds);
2094                    other.trim();
2095                    (Natural::from_owned_limbs_asc(qs), other)
2096                }
2097            }
2098        }
2099    }
2100}
2101
2102impl DivMod<&Natural> for &Natural {
2103    type DivOutput = Natural;
2104    type ModOutput = Natural;
2105
2106    /// Divides a [`Natural`] by another [`Natural`], taking both by reference and returning the
2107    /// quotient and remainder. The quotient is rounded towards negative infinity.
2108    ///
2109    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2110    ///
2111    /// $$
2112    /// f(x, y) = \left ( \left \lfloor \frac{x}{y} \right \rfloor, \space
2113    /// x - y\left \lfloor \frac{x}{y} \right \rfloor \right ).
2114    /// $$
2115    ///
2116    /// # Worst-case complexity
2117    /// $T(n) = O(n \log n \log\log n)$
2118    ///
2119    /// $M(n) = O(n \log n)$
2120    ///
2121    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2122    ///
2123    /// # Panics
2124    /// Panics if `other` is zero.
2125    ///
2126    /// # Examples
2127    /// ```
2128    /// use core::str::FromStr;
2129    /// use malachite_base::num::arithmetic::traits::DivMod;
2130    /// use malachite_base::strings::ToDebugString;
2131    /// use malachite_nz::natural::Natural;
2132    ///
2133    /// // 2 * 10 + 3 = 23
2134    /// assert_eq!(
2135    ///     (&Natural::from(23u32))
2136    ///         .div_mod(&Natural::from(10u32))
2137    ///         .to_debug_string(),
2138    ///     "(2, 3)"
2139    /// );
2140    ///
2141    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2142    /// assert_eq!(
2143    ///     (&Natural::from_str("1000000000000000000000000").unwrap())
2144    ///         .div_mod(&Natural::from_str("1234567890987").unwrap())
2145    ///         .to_debug_string(),
2146    ///     "(810000006723, 530068894399)"
2147    /// );
2148    /// ```
2149    fn div_mod(self, other: &Natural) -> (Natural, Natural) {
2150        if self == other {
2151            return (Natural::ONE, Natural::ZERO);
2152        }
2153        match (self, other) {
2154            (_, &Natural::ZERO) => panic!("division by zero"),
2155            (n, &Natural::ONE) => (n.clone(), Natural::ZERO),
2156            (n, Natural(Small(d))) => {
2157                let (q, r) = n.div_mod_limb_ref(*d);
2158                (q, Natural(Small(r)))
2159            }
2160            (Natural(Small(_)), _) => (Natural::ZERO, self.clone()),
2161            (Natural(Large(ns)), Natural(Large(ds))) => {
2162                if ns.len() < ds.len() {
2163                    (Natural::ZERO, self.clone())
2164                } else {
2165                    let (qs, rs) = limbs_div_mod(ns, ds);
2166                    (
2167                        Natural::from_owned_limbs_asc(qs),
2168                        Natural::from_owned_limbs_asc(rs),
2169                    )
2170                }
2171            }
2172        }
2173    }
2174}
2175
2176impl DivAssignMod<Self> for Natural {
2177    type ModOutput = Self;
2178
2179    /// Divides a [`Natural`] by another [`Natural`] in place, taking the [`Natural`] on the
2180    /// right-hand side by value and returning the remainder. The quotient is rounded towards
2181    /// negative infinity.
2182    ///
2183    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2184    ///
2185    /// $$
2186    /// f(x, y) = x - y\left \lfloor \frac{x}{y} \right \rfloor,
2187    /// $$
2188    /// $$
2189    /// x \gets \left \lfloor \frac{x}{y} \right \rfloor.
2190    /// $$
2191    ///
2192    /// # Worst-case complexity
2193    /// $T(n) = O(n \log n \log\log n)$
2194    ///
2195    /// $M(n) = O(n \log n)$
2196    ///
2197    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2198    ///
2199    /// # Panics
2200    /// Panics if `other` is zero.
2201    ///
2202    /// # Examples
2203    /// ```
2204    /// use core::str::FromStr;
2205    /// use malachite_base::num::arithmetic::traits::DivAssignMod;
2206    /// use malachite_nz::natural::Natural;
2207    ///
2208    /// // 2 * 10 + 3 = 23
2209    /// let mut x = Natural::from(23u32);
2210    /// assert_eq!(x.div_assign_mod(Natural::from(10u32)), 3);
2211    /// assert_eq!(x, 2);
2212    ///
2213    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2214    /// let mut x = Natural::from_str("1000000000000000000000000").unwrap();
2215    /// assert_eq!(
2216    ///     x.div_assign_mod(Natural::from_str("1234567890987").unwrap()),
2217    ///     530068894399u64
2218    /// );
2219    /// assert_eq!(x, 810000006723u64);
2220    /// ```
2221    fn div_assign_mod(&mut self, mut other: Self) -> Self {
2222        if *self == other {
2223            *self = Self::ONE;
2224            return Self::ZERO;
2225        }
2226        match (&mut *self, &mut other) {
2227            (_, &mut Self::ZERO) => panic!("division by zero"),
2228            (_, &mut Self::ONE) => Self::ZERO,
2229            (n, &mut Self(Small(d))) => Self(Small(n.div_assign_mod_limb(d))),
2230            (Self(Small(_)), _) => {
2231                let mut r = Self::ZERO;
2232                swap(self, &mut r);
2233                r
2234            }
2235            (Self(Large(ns)), Self(Large(ds))) => {
2236                if ns.len() < ds.len() {
2237                    let mut r = Self::ZERO;
2238                    swap(self, &mut r);
2239                    r
2240                } else {
2241                    let (mut qs, mut rs) = limbs_div_mod(ns, ds);
2242                    swap(&mut qs, ns);
2243                    swap(&mut rs, ds);
2244                    self.trim();
2245                    other.trim();
2246                    other
2247                }
2248            }
2249        }
2250    }
2251}
2252
2253impl<'a> DivAssignMod<&'a Self> for Natural {
2254    type ModOutput = Self;
2255
2256    /// Divides a [`Natural`] by another [`Natural`] in place, taking the [`Natural`] on the
2257    /// right-hand side by value and returning the remainder. The quotient is rounded towards
2258    /// negative infinity.
2259    ///
2260    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2261    ///
2262    /// $$
2263    /// f(x, y) = x - y\left \lfloor \frac{x}{y} \right \rfloor,
2264    /// $$
2265    /// $$
2266    /// x \gets \left \lfloor \frac{x}{y} \right \rfloor.
2267    /// $$
2268    ///
2269    /// # Worst-case complexity
2270    /// $T(n) = O(n \log n \log\log n)$
2271    ///
2272    /// $M(n) = O(n \log n)$
2273    ///
2274    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2275    ///
2276    /// # Panics
2277    /// Panics if `other` is zero.
2278    ///
2279    /// # Examples
2280    /// ```
2281    /// use core::str::FromStr;
2282    /// use malachite_base::num::arithmetic::traits::DivAssignMod;
2283    /// use malachite_nz::natural::Natural;
2284    ///
2285    /// // 2 * 10 + 3 = 23
2286    /// let mut x = Natural::from(23u32);
2287    /// assert_eq!(x.div_assign_mod(&Natural::from(10u32)), 3);
2288    /// assert_eq!(x, 2);
2289    ///
2290    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2291    /// let mut x = Natural::from_str("1000000000000000000000000").unwrap();
2292    /// assert_eq!(
2293    ///     x.div_assign_mod(&Natural::from_str("1234567890987").unwrap()),
2294    ///     530068894399u64
2295    /// );
2296    /// assert_eq!(x, 810000006723u64);
2297    /// ```
2298    fn div_assign_mod(&mut self, other: &'a Self) -> Self {
2299        if self == other {
2300            *self = Self::ONE;
2301            return Self::ZERO;
2302        }
2303        match (&mut *self, other) {
2304            (_, &Self::ZERO) => panic!("division by zero"),
2305            (_, &Self::ONE) => Self::ZERO,
2306            (_, Self(Small(d))) => Self(Small(self.div_assign_mod_limb(*d))),
2307            (Self(Small(_)), _) => {
2308                let mut r = Self::ZERO;
2309                swap(self, &mut r);
2310                r
2311            }
2312            (Self(Large(ns)), Self(Large(ds))) => {
2313                if ns.len() < ds.len() {
2314                    let mut r = Self::ZERO;
2315                    swap(self, &mut r);
2316                    r
2317                } else {
2318                    let (mut qs, rs) = limbs_div_mod(ns, ds);
2319                    swap(&mut qs, ns);
2320                    self.trim();
2321                    Self::from_owned_limbs_asc(rs)
2322                }
2323            }
2324        }
2325    }
2326}
2327
2328impl DivRem<Self> for Natural {
2329    type DivOutput = Self;
2330    type RemOutput = Self;
2331
2332    /// Divides a [`Natural`] by another [`Natural`], taking both by value and returning the
2333    /// quotient and remainder. The quotient is rounded towards zero.
2334    ///
2335    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2336    ///
2337    /// $$
2338    /// f(x, y) = \left ( \left \lfloor \frac{x}{y} \right \rfloor, \space
2339    /// x - y\left \lfloor \frac{x}{y} \right \rfloor \right ).
2340    /// $$
2341    ///
2342    /// For [`Natural`]s, `div_rem` is equivalent to
2343    /// [`div_mod`](malachite_base::num::arithmetic::traits::DivMod::div_mod).
2344    ///
2345    /// # Worst-case complexity
2346    /// $T(n) = O(n \log n \log\log n)$
2347    ///
2348    /// $M(n) = O(n \log n)$
2349    ///
2350    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2351    ///
2352    /// # Panics
2353    /// Panics if `other` is zero.
2354    ///
2355    /// # Examples
2356    /// ```
2357    /// use core::str::FromStr;
2358    /// use malachite_base::num::arithmetic::traits::DivRem;
2359    /// use malachite_base::strings::ToDebugString;
2360    /// use malachite_nz::natural::Natural;
2361    ///
2362    /// // 2 * 10 + 3 = 23
2363    /// assert_eq!(
2364    ///     Natural::from(23u32)
2365    ///         .div_rem(Natural::from(10u32))
2366    ///         .to_debug_string(),
2367    ///     "(2, 3)"
2368    /// );
2369    ///
2370    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2371    /// assert_eq!(
2372    ///     Natural::from_str("1000000000000000000000000")
2373    ///         .unwrap()
2374    ///         .div_rem(Natural::from_str("1234567890987").unwrap())
2375    ///         .to_debug_string(),
2376    ///     "(810000006723, 530068894399)"
2377    /// );
2378    /// ```
2379    #[inline]
2380    fn div_rem(self, other: Self) -> (Self, Self) {
2381        self.div_mod(other)
2382    }
2383}
2384
2385impl<'a> DivRem<&'a Self> for Natural {
2386    type DivOutput = Self;
2387    type RemOutput = Self;
2388
2389    /// Divides a [`Natural`] by another [`Natural`], taking the first by value and the second by
2390    /// reference and returning the quotient and remainder. The quotient is rounded towards zero.
2391    ///
2392    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2393    ///
2394    /// $$
2395    /// f(x, y) = \left ( \left \lfloor \frac{x}{y} \right \rfloor, \space
2396    /// x - y\left \lfloor \frac{x}{y} \right \rfloor \right ).
2397    /// $$
2398    ///
2399    /// For [`Natural`]s, `div_rem` is equivalent to
2400    /// [`div_mod`](malachite_base::num::arithmetic::traits::DivMod::div_mod).
2401    ///
2402    /// # Worst-case complexity
2403    /// $T(n) = O(n \log n \log\log n)$
2404    ///
2405    /// $M(n) = O(n \log n)$
2406    ///
2407    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2408    ///
2409    /// # Panics
2410    /// Panics if `other` is zero.
2411    ///
2412    /// # Examples
2413    /// ```
2414    /// use core::str::FromStr;
2415    /// use malachite_base::num::arithmetic::traits::DivRem;
2416    /// use malachite_base::strings::ToDebugString;
2417    /// use malachite_nz::natural::Natural;
2418    ///
2419    /// // 2 * 10 + 3 = 23
2420    /// assert_eq!(
2421    ///     Natural::from(23u32)
2422    ///         .div_rem(&Natural::from(10u32))
2423    ///         .to_debug_string(),
2424    ///     "(2, 3)"
2425    /// );
2426    ///
2427    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2428    /// assert_eq!(
2429    ///     Natural::from_str("1000000000000000000000000")
2430    ///         .unwrap()
2431    ///         .div_rem(&Natural::from_str("1234567890987").unwrap())
2432    ///         .to_debug_string(),
2433    ///     "(810000006723, 530068894399)"
2434    /// );
2435    /// ```
2436    #[inline]
2437    fn div_rem(self, other: &'a Self) -> (Self, Self) {
2438        self.div_mod(other)
2439    }
2440}
2441
2442impl DivRem<Natural> for &Natural {
2443    type DivOutput = Natural;
2444    type RemOutput = Natural;
2445
2446    /// Divides a [`Natural`] by another [`Natural`], taking the first by reference and the second
2447    /// by value and returning the quotient and remainder. The quotient is rounded towards zero.
2448    ///
2449    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2450    ///
2451    /// $$
2452    /// f(x, y) = \left ( \left \lfloor \frac{x}{y} \right \rfloor, \space
2453    /// x - y\left \lfloor \frac{x}{y} \right \rfloor \right ).
2454    /// $$
2455    ///
2456    /// For [`Natural`]s, `div_rem` is equivalent to
2457    /// [`div_mod`](malachite_base::num::arithmetic::traits::DivMod::div_mod).
2458    ///
2459    /// # Worst-case complexity
2460    /// $T(n) = O(n \log n \log\log n)$
2461    ///
2462    /// $M(n) = O(n \log n)$
2463    ///
2464    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2465    ///
2466    /// # Panics
2467    /// Panics if `other` is zero.
2468    ///
2469    /// # Examples
2470    /// ```
2471    /// use core::str::FromStr;
2472    /// use malachite_base::num::arithmetic::traits::DivRem;
2473    /// use malachite_base::strings::ToDebugString;
2474    /// use malachite_nz::natural::Natural;
2475    ///
2476    /// // 2 * 10 + 3 = 23
2477    /// assert_eq!(
2478    ///     (&Natural::from(23u32))
2479    ///         .div_rem(Natural::from(10u32))
2480    ///         .to_debug_string(),
2481    ///     "(2, 3)"
2482    /// );
2483    ///
2484    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2485    /// assert_eq!(
2486    ///     (&Natural::from_str("1000000000000000000000000").unwrap())
2487    ///         .div_rem(Natural::from_str("1234567890987").unwrap())
2488    ///         .to_debug_string(),
2489    ///     "(810000006723, 530068894399)"
2490    /// );
2491    /// ```
2492    #[inline]
2493    fn div_rem(self, other: Natural) -> (Natural, Natural) {
2494        self.div_mod(other)
2495    }
2496}
2497
2498impl DivRem<&Natural> for &Natural {
2499    type DivOutput = Natural;
2500    type RemOutput = Natural;
2501
2502    /// Divides a [`Natural`] by another [`Natural`], taking both by reference and returning the
2503    /// quotient and remainder. The quotient is rounded towards zero.
2504    ///
2505    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2506    ///
2507    /// $$
2508    /// f(x, y) = \left ( \left \lfloor \frac{x}{y} \right \rfloor, \space
2509    /// x - y\left \lfloor \frac{x}{y} \right \rfloor \right ).
2510    /// $$
2511    ///
2512    /// For [`Natural`]s, `div_rem` is equivalent to
2513    /// [`div_mod`](malachite_base::num::arithmetic::traits::DivMod::div_mod).
2514    ///
2515    /// # Worst-case complexity
2516    /// $T(n) = O(n \log n \log\log n)$
2517    ///
2518    /// $M(n) = O(n \log n)$
2519    ///
2520    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2521    ///
2522    /// # Panics
2523    /// Panics if `other` is zero.
2524    ///
2525    /// # Examples
2526    /// ```
2527    /// use core::str::FromStr;
2528    /// use malachite_base::num::arithmetic::traits::DivRem;
2529    /// use malachite_base::strings::ToDebugString;
2530    /// use malachite_nz::natural::Natural;
2531    ///
2532    /// // 2 * 10 + 3 = 23
2533    /// assert_eq!(
2534    ///     (&Natural::from(23u32))
2535    ///         .div_rem(&Natural::from(10u32))
2536    ///         .to_debug_string(),
2537    ///     "(2, 3)"
2538    /// );
2539    ///
2540    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2541    /// assert_eq!(
2542    ///     (&Natural::from_str("1000000000000000000000000").unwrap())
2543    ///         .div_rem(&Natural::from_str("1234567890987").unwrap())
2544    ///         .to_debug_string(),
2545    ///     "(810000006723, 530068894399)"
2546    /// );
2547    /// ```
2548    #[inline]
2549    fn div_rem(self, other: &Natural) -> (Natural, Natural) {
2550        self.div_mod(other)
2551    }
2552}
2553
2554impl DivAssignRem<Self> for Natural {
2555    type RemOutput = Self;
2556
2557    /// Divides a [`Natural`] by another [`Natural`] in place, taking the [`Natural`] on the
2558    /// right-hand side by value and returning the remainder. The quotient is rounded towards zero.
2559    ///
2560    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2561    ///
2562    /// $$
2563    /// f(x, y) = x - y\left \lfloor \frac{x}{y} \right \rfloor,
2564    /// $$
2565    /// $$
2566    /// x \gets \left \lfloor \frac{x}{y} \right \rfloor.
2567    /// $$
2568    ///
2569    /// For [`Natural`]s, `div_assign_rem` is equivalent to
2570    /// [`div_assign_mod`](malachite_base::num::arithmetic::traits::DivAssignMod::div_assign_mod).
2571    ///
2572    /// # Worst-case complexity
2573    /// $T(n) = O(n \log n \log\log n)$
2574    ///
2575    /// $M(n) = O(n \log n)$
2576    ///
2577    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2578    ///
2579    /// # Panics
2580    /// Panics if `other` is zero.
2581    ///
2582    /// # Examples
2583    /// ```
2584    /// use core::str::FromStr;
2585    /// use malachite_base::num::arithmetic::traits::DivAssignRem;
2586    /// use malachite_nz::natural::Natural;
2587    ///
2588    /// // 2 * 10 + 3 = 23
2589    /// let mut x = Natural::from(23u32);
2590    /// assert_eq!(x.div_assign_rem(Natural::from(10u32)), 3);
2591    /// assert_eq!(x, 2);
2592    ///
2593    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2594    /// let mut x = Natural::from_str("1000000000000000000000000").unwrap();
2595    /// assert_eq!(
2596    ///     x.div_assign_rem(Natural::from_str("1234567890987").unwrap()),
2597    ///     530068894399u64
2598    /// );
2599    /// assert_eq!(x, 810000006723u64);
2600    /// ```
2601    #[inline]
2602    fn div_assign_rem(&mut self, other: Self) -> Self {
2603        self.div_assign_mod(other)
2604    }
2605}
2606
2607impl<'a> DivAssignRem<&'a Self> for Natural {
2608    type RemOutput = Self;
2609
2610    /// Divides a [`Natural`] by another [`Natural`] in place, taking the [`Natural`] on the
2611    /// right-hand side by reference and returning the remainder. The quotient is rounded towards
2612    /// zero.
2613    ///
2614    /// The quotient and remainder satisfy $x = qy + r$ and $0 \leq r < y$.
2615    ///
2616    /// $$
2617    /// f(x, y) = x - y\left \lfloor \frac{x}{y} \right \rfloor,
2618    /// $$
2619    /// $$
2620    /// x \gets \left \lfloor \frac{x}{y} \right \rfloor.
2621    /// $$
2622    ///
2623    /// For [`Natural`]s, `div_assign_rem` is equivalent to
2624    /// [`div_assign_mod`](malachite_base::num::arithmetic::traits::DivAssignMod::div_assign_mod).
2625    ///
2626    /// # Worst-case complexity
2627    /// $T(n) = O(n \log n \log\log n)$
2628    ///
2629    /// $M(n) = O(n \log n)$
2630    ///
2631    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2632    ///
2633    /// # Panics
2634    /// Panics if `other` is zero.
2635    ///
2636    /// # Examples
2637    /// ```
2638    /// use core::str::FromStr;
2639    /// use malachite_base::num::arithmetic::traits::DivAssignRem;
2640    /// use malachite_nz::natural::Natural;
2641    ///
2642    /// // 2 * 10 + 3 = 23
2643    /// let mut x = Natural::from(23u32);
2644    /// assert_eq!(x.div_assign_rem(&Natural::from(10u32)), 3);
2645    /// assert_eq!(x, 2);
2646    ///
2647    /// // 810000006723 * 1234567890987 + 530068894399 = 1000000000000000000000000
2648    /// let mut x = Natural::from_str("1000000000000000000000000").unwrap();
2649    /// assert_eq!(
2650    ///     x.div_assign_rem(&Natural::from_str("1234567890987").unwrap()),
2651    ///     530068894399u64
2652    /// );
2653    /// assert_eq!(x, 810000006723u64);
2654    /// ```
2655    #[inline]
2656    fn div_assign_rem(&mut self, other: &'a Self) -> Self {
2657        self.div_assign_mod(other)
2658    }
2659}
2660
2661impl CeilingDivNegMod<Self> for Natural {
2662    type DivOutput = Self;
2663    type ModOutput = Self;
2664
2665    /// Divides a [`Natural`] by another [`Natural`], taking both by value and returning the ceiling
2666    /// of the quotient and the remainder of the negative of the first [`Natural`] divided by the
2667    /// second.
2668    ///
2669    /// The quotient and remainder satisfy $x = qy - r$ and $0 \leq r < y$.
2670    ///
2671    /// $$
2672    /// f(x, y) = \left ( \left \lceil \frac{x}{y} \right \rceil, \space
2673    /// y\left \lceil \frac{x}{y} \right \rceil - x \right ).
2674    /// $$
2675    ///
2676    /// # Worst-case complexity
2677    /// $T(n) = O(n \log n \log\log n)$
2678    ///
2679    /// $M(n) = O(n \log n)$
2680    ///
2681    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2682    ///
2683    /// # Panics
2684    /// Panics if `other` is zero.
2685    ///
2686    /// # Examples
2687    /// ```
2688    /// use core::str::FromStr;
2689    /// use malachite_base::num::arithmetic::traits::CeilingDivNegMod;
2690    /// use malachite_base::strings::ToDebugString;
2691    /// use malachite_nz::natural::Natural;
2692    ///
2693    /// // 3 * 10 - 7 = 23
2694    /// assert_eq!(
2695    ///     Natural::from(23u32)
2696    ///         .ceiling_div_neg_mod(Natural::from(10u32))
2697    ///         .to_debug_string(),
2698    ///     "(3, 7)"
2699    /// );
2700    ///
2701    /// // 810000006724 * 1234567890987 - 704498996588 = 1000000000000000000000000
2702    /// assert_eq!(
2703    ///     Natural::from_str("1000000000000000000000000")
2704    ///         .unwrap()
2705    ///         .ceiling_div_neg_mod(Natural::from_str("1234567890987").unwrap())
2706    ///         .to_debug_string(),
2707    ///     "(810000006724, 704498996588)"
2708    /// );
2709    /// ```
2710    #[inline]
2711    fn ceiling_div_neg_mod(mut self, other: Self) -> (Self, Self) {
2712        let r = self.ceiling_div_assign_neg_mod(other);
2713        (self, r)
2714    }
2715}
2716
2717impl<'a> CeilingDivNegMod<&'a Self> for Natural {
2718    type DivOutput = Self;
2719    type ModOutput = Self;
2720
2721    /// Divides a [`Natural`] by another [`Natural`], taking the first by value and the second by
2722    /// reference and returning the ceiling of the quotient and the remainder of the negative of the
2723    /// first [`Natural`] divided by the second.
2724    ///
2725    /// The quotient and remainder satisfy $x = qy - r$ and $0 \leq r < y$.
2726    ///
2727    /// $$
2728    /// f(x, y) = \left ( \left \lceil \frac{x}{y} \right \rceil, \space
2729    /// y\left \lceil \frac{x}{y} \right \rceil - x \right ).
2730    /// $$
2731    ///
2732    /// # Worst-case complexity
2733    /// $T(n) = O(n \log n \log\log n)$
2734    ///
2735    /// $M(n) = O(n \log n)$
2736    ///
2737    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2738    ///
2739    /// # Panics
2740    /// Panics if `other` is zero.
2741    ///
2742    /// # Examples
2743    /// ```
2744    /// use core::str::FromStr;
2745    /// use malachite_base::num::arithmetic::traits::CeilingDivNegMod;
2746    /// use malachite_base::strings::ToDebugString;
2747    /// use malachite_nz::natural::Natural;
2748    ///
2749    /// // 3 * 10 - 7 = 23
2750    /// assert_eq!(
2751    ///     Natural::from(23u32)
2752    ///         .ceiling_div_neg_mod(&Natural::from(10u32))
2753    ///         .to_debug_string(),
2754    ///     "(3, 7)"
2755    /// );
2756    ///
2757    /// // 810000006724 * 1234567890987 - 704498996588 = 1000000000000000000000000
2758    /// assert_eq!(
2759    ///     Natural::from_str("1000000000000000000000000")
2760    ///         .unwrap()
2761    ///         .ceiling_div_neg_mod(&Natural::from_str("1234567890987").unwrap())
2762    ///         .to_debug_string(),
2763    ///     "(810000006724, 704498996588)"
2764    /// );
2765    /// ```
2766    #[inline]
2767    fn ceiling_div_neg_mod(mut self, other: &'a Self) -> (Self, Self) {
2768        let r = self.ceiling_div_assign_neg_mod(other);
2769        (self, r)
2770    }
2771}
2772
2773impl CeilingDivNegMod<Natural> for &Natural {
2774    type DivOutput = Natural;
2775    type ModOutput = Natural;
2776
2777    /// Divides a [`Natural`] by another [`Natural`], taking the first by reference and the second
2778    /// by value and returning the ceiling of the quotient and the remainder of the negative of the
2779    /// first [`Natural`] divided by the second.
2780    ///
2781    /// The quotient and remainder satisfy $x = qy - r$ and $0 \leq r < y$.
2782    ///
2783    /// $$
2784    /// f(x, y) = \left ( \left \lceil \frac{x}{y} \right \rceil, \space
2785    /// y\left \lceil \frac{x}{y} \right \rceil - x \right ).
2786    /// $$
2787    ///
2788    /// # Worst-case complexity
2789    /// $T(n) = O(n \log n \log\log n)$
2790    ///
2791    /// $M(n) = O(n \log n)$
2792    ///
2793    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2794    ///
2795    /// # Panics
2796    /// Panics if `other` is zero.
2797    ///
2798    /// # Examples
2799    /// ```
2800    /// use core::str::FromStr;
2801    /// use malachite_base::num::arithmetic::traits::CeilingDivNegMod;
2802    /// use malachite_base::strings::ToDebugString;
2803    /// use malachite_nz::natural::Natural;
2804    ///
2805    /// // 3 * 10 - 7 = 23
2806    /// assert_eq!(
2807    ///     (&Natural::from(23u32))
2808    ///         .ceiling_div_neg_mod(Natural::from(10u32))
2809    ///         .to_debug_string(),
2810    ///     "(3, 7)"
2811    /// );
2812    ///
2813    /// // 810000006724 * 1234567890987 - 704498996588 = 1000000000000000000000000
2814    /// assert_eq!(
2815    ///     (&Natural::from_str("1000000000000000000000000").unwrap())
2816    ///         .ceiling_div_neg_mod(Natural::from_str("1234567890987").unwrap())
2817    ///         .to_debug_string(),
2818    ///     "(810000006724, 704498996588)"
2819    /// );
2820    /// ```
2821    fn ceiling_div_neg_mod(self, other: Natural) -> (Natural, Natural) {
2822        let (q, r) = self.div_mod(&other);
2823        if r == 0 {
2824            (q, r)
2825        } else {
2826            (q.add_limb(1), other - r)
2827        }
2828    }
2829}
2830
2831impl CeilingDivNegMod<&Natural> for &Natural {
2832    type DivOutput = Natural;
2833    type ModOutput = Natural;
2834
2835    /// Divides a [`Natural`] by another [`Natural`], taking both by reference and returning the
2836    /// ceiling of the quotient and the remainder of the negative of the first [`Natural`] divided
2837    /// by the second.
2838    ///
2839    /// The quotient and remainder satisfy $x = qy - r$ and $0 \leq r < y$.
2840    ///
2841    /// $$
2842    /// f(x, y) = \left ( \left \lceil \frac{x}{y} \right \rceil, \space
2843    /// y\left \lceil \frac{x}{y} \right \rceil - x \right ).
2844    /// $$
2845    ///
2846    /// # Worst-case complexity
2847    /// $T(n) = O(n \log n \log\log n)$
2848    ///
2849    /// $M(n) = O(n \log n)$
2850    ///
2851    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2852    ///
2853    /// # Panics
2854    /// Panics if `other` is zero.
2855    ///
2856    /// # Examples
2857    /// ```
2858    /// use core::str::FromStr;
2859    /// use malachite_base::num::arithmetic::traits::CeilingDivNegMod;
2860    /// use malachite_base::strings::ToDebugString;
2861    /// use malachite_nz::natural::Natural;
2862    ///
2863    /// // 3 * 10 - 7 = 23
2864    /// assert_eq!(
2865    ///     (&Natural::from(23u32))
2866    ///         .ceiling_div_neg_mod(&Natural::from(10u32))
2867    ///         .to_debug_string(),
2868    ///     "(3, 7)"
2869    /// );
2870    ///
2871    /// // 810000006724 * 1234567890987 - 704498996588 = 1000000000000000000000000
2872    /// assert_eq!(
2873    ///     (&Natural::from_str("1000000000000000000000000").unwrap())
2874    ///         .ceiling_div_neg_mod(&Natural::from_str("1234567890987").unwrap())
2875    ///         .to_debug_string(),
2876    ///     "(810000006724, 704498996588)"
2877    /// );
2878    /// ```
2879    fn ceiling_div_neg_mod(self, other: &Natural) -> (Natural, Natural) {
2880        let (q, r) = self.div_mod(other);
2881        if r == 0 {
2882            (q, r)
2883        } else {
2884            (q.add_limb(1), other - r)
2885        }
2886    }
2887}
2888
2889impl CeilingDivAssignNegMod<Self> for Natural {
2890    type ModOutput = Self;
2891
2892    /// Divides a [`Natural`] by another [`Natural`] in place, taking the [`Natural`] on the
2893    /// right-hand side by value and returning the remainder of the negative of the first number
2894    /// divided by the second.
2895    ///
2896    /// The quotient and remainder satisfy $x = qy - r$ and $0 \leq r < y$.
2897    ///
2898    /// $$
2899    /// f(x, y) = y\left \lceil \frac{x}{y} \right \rceil - x,
2900    /// $$
2901    /// $$
2902    /// x \gets \left \lceil \frac{x}{y} \right \rceil.
2903    /// $$
2904    ///
2905    /// # Worst-case complexity
2906    /// $T(n) = O(n \log n \log\log n)$
2907    ///
2908    /// $M(n) = O(n \log n)$
2909    ///
2910    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2911    ///
2912    /// # Panics
2913    /// Panics if `other` is zero.
2914    ///
2915    /// # Examples
2916    /// ```
2917    /// use core::str::FromStr;
2918    /// use malachite_base::num::arithmetic::traits::CeilingDivAssignNegMod;
2919    /// use malachite_nz::natural::Natural;
2920    ///
2921    /// // 3 * 10 - 7 = 23
2922    /// let mut x = Natural::from(23u32);
2923    /// assert_eq!(x.ceiling_div_assign_neg_mod(Natural::from(10u32)), 7);
2924    /// assert_eq!(x, 3);
2925    ///
2926    /// // 810000006724 * 1234567890987 - 704498996588 = 1000000000000000000000000
2927    /// let mut x = Natural::from_str("1000000000000000000000000").unwrap();
2928    /// assert_eq!(
2929    ///     x.ceiling_div_assign_neg_mod(Natural::from_str("1234567890987").unwrap()),
2930    ///     704498996588u64,
2931    /// );
2932    /// assert_eq!(x, 810000006724u64);
2933    /// ```
2934    fn ceiling_div_assign_neg_mod(&mut self, other: Self) -> Self {
2935        let r = self.div_assign_mod(&other);
2936        if r == 0 {
2937            Self::ZERO
2938        } else {
2939            *self += Self::ONE;
2940            other - r
2941        }
2942    }
2943}
2944
2945impl<'a> CeilingDivAssignNegMod<&'a Self> for Natural {
2946    type ModOutput = Self;
2947
2948    /// Divides a [`Natural`] by another [`Natural`] in place, taking the [`Natural`] on the
2949    /// right-hand side by reference and returning the remainder of the negative of the first number
2950    /// divided by the second.
2951    ///
2952    /// The quotient and remainder satisfy $x = qy - r$ and $0 \leq r < y$.
2953    ///
2954    /// $$
2955    /// f(x, y) = y\left \lceil \frac{x}{y} \right \rceil - x,
2956    /// $$
2957    /// $$
2958    /// x \gets \left \lceil \frac{x}{y} \right \rceil.
2959    /// $$
2960    ///
2961    /// # Worst-case complexity
2962    /// $T(n) = O(n \log n \log\log n)$
2963    ///
2964    /// $M(n) = O(n \log n)$
2965    ///
2966    /// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
2967    ///
2968    /// # Panics
2969    /// Panics if `other` is zero.
2970    ///
2971    /// # Examples
2972    /// ```
2973    /// use core::str::FromStr;
2974    /// use malachite_base::num::arithmetic::traits::CeilingDivAssignNegMod;
2975    /// use malachite_nz::natural::Natural;
2976    ///
2977    /// // 3 * 10 - 7 = 23
2978    /// let mut x = Natural::from(23u32);
2979    /// assert_eq!(x.ceiling_div_assign_neg_mod(&Natural::from(10u32)), 7);
2980    /// assert_eq!(x, 3);
2981    ///
2982    /// // 810000006724 * 1234567890987 - 704498996588 = 1000000000000000000000000
2983    /// let mut x = Natural::from_str("1000000000000000000000000").unwrap();
2984    /// assert_eq!(
2985    ///     x.ceiling_div_assign_neg_mod(&Natural::from_str("1234567890987").unwrap()),
2986    ///     704498996588u64,
2987    /// );
2988    /// assert_eq!(x, 810000006724u64);
2989    /// ```
2990    fn ceiling_div_assign_neg_mod(&mut self, other: &'a Self) -> Self {
2991        let r = self.div_assign_mod(other);
2992        if r == 0 {
2993            Self::ZERO
2994        } else {
2995            *self += Self::ONE;
2996            other - r
2997        }
2998    }
2999}