div-pow10 0.1.1

Fast division by powers of 10.
Documentation
/// Calculate division: `n / 10.pow(i)`.
///
/// The dividend is 1-word, 64-bit, same with divisor.
///
/// Return None if: `i > 19`.
pub fn div_single(n: u64, i: u32) -> Option<u64> {
    if i > 19 {
        None
    } else if i == 0 {
        Some(n)
    } else {
        Some(unsafe { unchecked_div_single(n, i) })
    }
}

/// Calculate division: `n / 10.pow(i)`.
///
/// The dividend is 1-word, 64-bit, same with divisor.
///
/// # Safety:
///
/// It's UB if: `i == 0` or `i > 19`.
pub unsafe fn unchecked_div_single(n: u64, i: u32) -> u64 {
    debug_assert!(i > 0);
    unsafe { do_unchecked_div_single(n, i, false) }
}

/// Calculate division: `n / 10.pow(i)`.
///
/// Compared to [`unchecked_div_single`], this divident is 63-bit,
/// which is 1 bit less. This is slightly faster.
/// Besides, this works with `i = 0`, but not works with `i == 19`.
///
/// # Saftey:
///
/// It's UB if `i > 18` or `n > 2.pow(63)`.
pub unsafe fn unchecked_div_single_r1b(n: u64, i: u32) -> u64 {
    debug_assert!(n <= 2_u64.pow(63));
    debug_assert!(i < 19);
    unsafe { do_unchecked_div_single(n, i, true) }
}

unsafe fn do_unchecked_div_single(n: u64, i: u32, is_r1b: bool) -> u64 {
    const GM_EXP_MAGICS: [(u64, u32, u32); 20] = [
        (0, 0, u32::MAX),
        (0x999999999999999a, 4, 3),
        (0x47ae147ae147ae15, 7, 6),
        (0x0624dd2f1a9fbe77, 10, 9),
        (0xa36e2eb1c432ca58, 14, 13),
        (0x4f8b588e368f0847, 17, 16),
        (0x0c6f7a0b5ed8d36c, 20, 19),
        (0xad7f29abcaf48579, 24, 23),
        (0x5798ee2308c39dfa, 27, 26),
        (0x12e0be826d694b2f, 30, 29),
        (0xb7cdfd9d7bdbab7e, 34, 33),
        (0x5fd7fe17964955fe, 37, 36),
        (0x19799812dea11198, 40, 39),
        (0xc25c268497681c27, 44, 43),
        (0x6849b86a12b9b01f, 47, 46),
        (0x203af9ee756159b3, 50, 49),
        (0xcd2b297d889bc2b7, 54, 53),
        (0x70ef54646d496893, 57, 56),
        (0x2725dd1d243aba0f, 60, 59),
        (0xd83c94fb6d2ac34b, u32::MAX, 63),
    ];

    debug_assert!(i < 20);
    let magic = unsafe { GM_EXP_MAGICS.get_unchecked(i as usize) };

    // (n + ((n * m) >> 64)) >> l
    let high = ((n as u128 * magic.0 as u128) >> 64) as u64;

    if is_r1b {
        // no overflow
        (high + n) >> magic.1
    } else {
        // n + high may overflow, so
        // note: magic.1 = l - 1
        (high + ((n - high) >> 1)) >> magic.2
    }
}

/// Calculate division: `n / 10.pow(i)`, return the quotient and remainder.
///
/// The dividend is 2-word, 128-bit, double of divisor.
///
/// Return `None` if: `i > 19` or `n>>64 >= 10.pow(i)`.
pub fn div_double(n: u128, i: u32) -> Option<(u64, u64)> {
    let Some(exp) = POWERS.get(i as usize) else {
        return None;
    };
    if (n >> 64) as u64 >= *exp {
        return None;
    }
    Some(unsafe { unchecked_div_double(n, i) })
}

/// Calculate division: `n / 10.pow(i)`, return the quotient and remainder.
///
/// The dividend is 2-word, 128-bit, double of divisor.
///
/// # Safety:
///
/// It's UB if: `i > 19` or `n>>64 >= 10.pow(i)`.
pub unsafe fn unchecked_div_double(n: u128, i: u32) -> (u64, u64) {
    debug_assert!((i as usize) < POWERS.len());
    let exp = unsafe { *POWERS.get_unchecked(i as usize) };

    // check overflow
    debug_assert!(((n >> 64) as u64) < exp);

    // The magics are generated by python code:
    //
    // def gen(d):
    //     zeros = 64 - d.bit_length()
    //     magic = pow(2, 128) // (d << zeros)
    //     magic = magic - pow(2, 64) # make magic fit in 128-bit
    //     return (magic, zeros)
    const MG_EXP_MAGICS: [(u64, u32); 20] = [
        (0, 64),
        (0x9999999999999999, 60),
        (0x47ae147ae147ae14, 57),
        (0x0624dd2f1a9fbe76, 54),
        (0xa36e2eb1c432ca57, 50),
        (0x4f8b588e368f0846, 47),
        (0x0c6f7a0b5ed8d36b, 44),
        (0xad7f29abcaf48578, 40),
        (0x5798ee2308c39df9, 37),
        (0x12e0be826d694b2e, 34),
        (0xb7cdfd9d7bdbab7d, 30),
        (0x5fd7fe17964955fd, 27),
        (0x19799812dea11197, 24),
        (0xc25c268497681c26, 20),
        (0x6849b86a12b9b01e, 17),
        (0x203af9ee756159b2, 14),
        (0xcd2b297d889bc2b6, 10),
        (0x70ef54646d496892, 7),
        (0x2725dd1d243aba0e, 4),
        (0xd83c94fb6d2ac34a, 0),
    ];

    // algorithm:
    //   zn = n << zeros
    //   q = (((magic * zn) >> 64) + zn) >> 64

    // SAFETY: exp has been read by i already
    let &(magic, zeros) = unsafe { MG_EXP_MAGICS.get_unchecked(i as usize) };

    // calc: (high, low) := n << zeros
    let zn = n << zeros;
    let high = (zn >> 64) as u64;
    let low = zn as u64;

    // calc: mul := (magic * zn) >> 64
    let mul_low = (low as u128 * magic as u128) >> 64;
    let mul = (high as u128 * magic as u128) + mul_low;

    // calc: final q
    let q = (mul + zn) >> 64;

    // correction by remainder
    let r = (n - q * exp as u128) as u64;
    if r < exp {
        (q as u64, r)
    } else {
        (q as u64 + 1, r - exp)
    }
}

const POWERS: [u64; 20] = [
    1,
    10_u64.pow(1),
    10_u64.pow(2),
    10_u64.pow(3),
    10_u64.pow(4),
    10_u64.pow(5),
    10_u64.pow(6),
    10_u64.pow(7),
    10_u64.pow(8),
    10_u64.pow(9),
    10_u64.pow(10),
    10_u64.pow(11),
    10_u64.pow(12),
    10_u64.pow(13),
    10_u64.pow(14),
    10_u64.pow(15),
    10_u64.pow(16),
    10_u64.pow(17),
    10_u64.pow(18),
    10_u64.pow(19),
];

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_single() {
        let n = 123_u64;
        assert_eq!(div_single(n, 20), None);
        assert_eq!(div_single(n, 0), Some(n));

        const COUNT: u64 = 100000;
        const STEP: u64 = u64::MAX / COUNT as u64;
        for i in 1..20 {
            let pow = POWERS[i as usize];
            for j in 0..COUNT {
                let n = j * STEP;
                assert_eq!(div_single(n, i), Some(n / pow));

                if i < 19 && n <= 2_u64.pow(63) {
                    assert_eq!(unsafe { unchecked_div_single_r1b(n, i) }, n / pow);
                }
            }
        }
    }

    #[test]
    fn test_double() {
        let n = 123_u128;
        assert_eq!(div_double(n, 20), None);
        assert_eq!(div_double(n, 0), Some((n as u64, 0)));

        const COUNT: u64 = 1000; // enlarge this for more test
        const K_STEP: u64 = u64::MAX / COUNT;
        for i in 1..20 {
            let pow = POWERS[i as usize];
            let count = COUNT.min(pow);
            let step = pow / count;
            for j in 0..count {
                let high = j * step;

                for k in 0..COUNT {
                    let low = k * K_STEP;

                    let n = ((high as u128) << 64) + low as u128;

                    let (q, r) = div_double(n, i).unwrap();

                    let p = q as u128 * pow as u128 + r as u128;

                    assert_eq!(p, n);
                }
            }
        }
    }
}