use crate::{
arch::word::{DoubleWord, Word},
helper_macros::debug_assert_zero,
math::{shl_dword, shr_word, FastDivideNormalized, FastDivideNormalized2},
memory::{self, Memory},
primitive::{
double_word, extend_word, highest_dword, lowest_dword, split_dword, split_hi_word,
WORD_BITS,
},
shift,
};
use alloc::alloc::Layout;
mod divide_conquer;
mod simple;
pub(crate) use simple::div_rem_highest_word;
const THRESHOLD_SIMPLE: usize = 32;
#[inline]
pub(crate) fn normalize(words: &mut [Word]) -> (u32, FastDivideNormalized2) {
let shift = words.last().unwrap().leading_zeros();
debug_assert_zero!(shift::shl_in_place(words, shift));
let top_words = highest_dword(words);
(shift, FastDivideNormalized2::new(top_words))
}
#[must_use]
pub fn div_by_word_in_place(words: &mut [Word], rhs: Word) -> Word {
debug_assert!(rhs != 0 && !words.is_empty());
if rhs == 1 {
return 0;
} else if rhs.is_power_of_two() {
let shift = rhs.trailing_zeros();
let rem = shift::shr_in_place(words, shift);
return rem >> (WORD_BITS - shift);
}
let shift = rhs.leading_zeros();
let fast_div_rhs = FastDivideNormalized::new(rhs << shift);
fast_div_by_word_in_place(words, shift, fast_div_rhs)
}
#[must_use]
pub(crate) fn fast_div_by_word_in_place(
words: &mut [Word],
shift: u32,
fast_div_rhs: FastDivideNormalized,
) -> Word {
let mut rem = shift::shl_in_place(words, shift);
for word in words.iter_mut().rev() {
let a = double_word(*word, rem);
let (q, r) = fast_div_rhs.div_rem_2by1(a);
*word = q;
rem = r;
}
rem >> shift
}
pub const fn rem_by_word(words: &[Word], rhs: Word) -> Word {
debug_assert!(rhs != 0 && !words.is_empty());
if rhs.is_power_of_two() {
return words[0] & (rhs - 1);
}
let shift = rhs.leading_zeros();
let fast_div_rhs = FastDivideNormalized::new(rhs << shift);
let rem = fast_rem_by_normalized_word(words, fast_div_rhs);
let a = extend_word(rem) << shift;
let (_, rem) = fast_div_rhs.div_rem_2by1(a);
rem >> shift
}
pub(crate) const fn fast_rem_by_normalized_word(
words: &[Word],
fast_div_rhs: FastDivideNormalized,
) -> Word {
debug_assert!(!words.is_empty());
let (last, words_lo) = split_hi_word(words);
let mut rem = fast_div_rhs.div_rem_1by1(last).1;
let mut i = words_lo.len();
while i > 0 {
i -= 1;
let a = double_word(words_lo[i], rem);
rem = fast_div_rhs.div_rem_2by1(a).1;
}
rem
}
pub fn div_by_dword_in_place(words: &mut [Word], rhs: DoubleWord) -> DoubleWord {
debug_assert!(rhs > Word::MAX as DoubleWord, "call div_by_word_in_place when rhs is small");
debug_assert!(words.len() >= 2);
if rhs.is_power_of_two() {
let first = shift::shr_in_place_one_word(words);
let shift = rhs.trailing_zeros() - WORD_BITS;
if shift == 0 {
return extend_word(first);
} else {
let n2 = shift::shr_in_place(words, shift);
let (n1, n0) = shr_word(first, shift);
return double_word(n0, n1 | n2) >> (WORD_BITS - shift);
}
}
let shift = rhs.leading_zeros();
debug_assert!(shift < WORD_BITS); let fast_div_rhs = FastDivideNormalized2::new(rhs << shift);
fast_div_by_dword_in_place(words, shift, fast_div_rhs)
}
#[must_use]
pub(crate) fn fast_div_by_dword_in_place(
words: &mut [Word],
shift: u32,
fast_div_rhs: FastDivideNormalized2,
) -> DoubleWord {
debug_assert!(words.len() >= 2 && shift < WORD_BITS);
let hi = shift::shl_in_place(words, shift);
let (top_hi, words_lo) = words.split_last_mut().unwrap();
let (top_lo, words_lo) = words_lo.split_last_mut().unwrap();
let (q, mut rem) = fast_div_rhs.div_rem_3by2(*top_lo, double_word(*top_hi, hi));
*top_hi = 0;
*top_lo = q;
let mut dwords = words_lo.rchunks_exact_mut(2);
for chunk in &mut dwords {
let dword = lowest_dword(chunk);
let (q, new_rem) = fast_div_rhs.div_rem_4by2(dword, rem);
let (new_lo, new_hi) = split_dword(q);
*chunk.first_mut().unwrap() = new_lo;
*chunk.last_mut().unwrap() = new_hi;
rem = new_rem;
}
let r = dwords.into_remainder();
if !r.is_empty() {
debug_assert!(r.len() == 1);
let r0 = r.first_mut().unwrap();
let (q, new_rem) = fast_div_rhs.div_rem_3by2(*r0, rem);
*r0 = q;
rem = new_rem;
}
rem >> shift
}
pub const fn rem_by_dword(words: &[Word], rhs: DoubleWord) -> DoubleWord {
debug_assert!(rhs > Word::MAX as DoubleWord, "call div_by_word_in_place when rhs is small");
debug_assert!(words.len() >= 2);
if rhs.is_power_of_two() {
return double_word(words[0], words[1]) & (rhs - 1);
}
let shift = rhs.leading_zeros();
debug_assert!(shift < WORD_BITS);
let fast_div_rhs = FastDivideNormalized2::new(rhs << shift);
let rem = fast_rem_by_normalized_dword(words, fast_div_rhs);
let (a0, a1, a2) = shl_dword(rem, shift);
let (_, rem) = fast_div_rhs.div_rem_3by2(a0, double_word(a1, a2));
rem >> shift
}
pub(crate) const fn fast_rem_by_normalized_dword(
words: &[Word],
fast_div_rhs: FastDivideNormalized2,
) -> DoubleWord {
debug_assert!(words.len() >= 2);
let mut i = words.len() - 1;
let top_dword = double_word(words[i - 1], words[i]);
let mut rem = fast_div_rhs.div_rem_2by2(top_dword).1;
while i > 2 {
i -= 2;
let top_dword = double_word(words[i - 1], words[i]);
rem = fast_div_rhs.div_rem_4by2(top_dword, rem).1;
}
if i == 2 {
rem = fast_div_rhs.div_rem_3by2(words[0], rem).1
}
rem
}
pub fn memory_requirement_exact(lhs_len: usize, rhs_len: usize) -> Layout {
assert!(lhs_len >= rhs_len && rhs_len >= 2);
if rhs_len <= THRESHOLD_SIMPLE || lhs_len - rhs_len <= THRESHOLD_SIMPLE {
memory::zero_layout()
} else {
divide_conquer::memory_requirement_exact(lhs_len, rhs_len)
}
}
#[must_use]
pub(crate) fn div_rem_in_place(
lhs: &mut [Word],
rhs: &[Word],
fast_div_rhs_top: FastDivideNormalized2,
memory: &mut Memory,
) -> bool {
debug_assert!(lhs.len() >= rhs.len() && rhs.len() >= 2);
if rhs.len() <= THRESHOLD_SIMPLE || lhs.len() - rhs.len() <= THRESHOLD_SIMPLE {
simple::div_rem_in_place(lhs, rhs, fast_div_rhs_top)
} else {
divide_conquer::div_rem_in_place(lhs, rhs, fast_div_rhs_top, memory)
}
}
pub(crate) fn div_rem_unshifted_in_place(
lhs: &mut [Word],
rhs: &[Word],
shift: u32,
fast_div_rhs_top: FastDivideNormalized2,
memory: &mut Memory,
) -> Word {
let lhs_carry = shift::shl_in_place(lhs, shift);
let mut q_top = if lhs_carry > 0 {
div_rem_highest_word(lhs_carry, lhs, rhs, fast_div_rhs_top)
} else {
0
};
let overflow = div_rem_in_place(lhs, rhs, fast_div_rhs_top, memory);
q_top += overflow as Word;
q_top
}