ffnt 0.11.0 - Docs.rs

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use std::{
    fmt::{self, Display},
    iter::{Product, Sum},
    num::{IntErrorKind, TryFromIntError},
    ops::{
        Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign,
    },
    str::FromStr,
};

use crate::{ParseIntError, Z32};

/// Element of a finite field with a 64 bit characteristic `P`
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[repr(transparent)]
pub struct Z64<const P: u64>(u64);

impl<const P: u64> Z64<P> {
    const INFO: Z64Info = Z64Info::new(P);

    /// Minimum field element, i.e. 0
    pub const MIN: Z64<P> = {
        assert!(P > 0);
        Self::new_unchecked(0)
    };
    /// Maximum field element, i.e. `P - 1`
    pub const MAX: Z64<P> = {
        assert!(P > 1);
        Self::new_unchecked(P - 1)
    };

    /// Create a new field element corresponding to some integer
    ///
    /// The integer is reduced modulo the field characteristic `P`
    pub const fn new(z: i64) -> Self {
        let res = remi(z, P, Self::info().red_struct);
        debug_assert!(res >= 0);
        let res = res as u64;
        Self::new_unchecked(res)
    }

    /// Create a new field element corresponding to some integer
    /// without modular reduction
    ///
    /// # Safety
    ///
    /// The argument should be less than `P`
    pub const fn new_unchecked(z: u64) -> Self {
        assert!(P > 0);
        debug_assert!(z < P);
        Self(z)
    }

    /// The multiplicative inverse `1/z` of a field element `z`.
    ///
    /// # Panics
    ///
    /// Panics if `z` is not invertible. If the characteristic `P` is
    /// a prime power this happens only if `z` is zero.
    pub const fn inv(&self) -> Self {
        self.try_inv()
            .expect("Number has no multiplicative inverse")
    }

    /// The multiplicative inverse `1/z` of a field element `z` or
    /// `None` if the inverse does not exist
    pub const fn try_inv(&self) -> Option<Self> {
        let res = extended_gcd(self.0, Self::modulus());
        if res.gcd != 1 {
            return None;
        }
        let s = res.bezout[0];
        let inv = if s < 0 {
            debug_assert!(s + Self::modulus() as i64 >= 0);
            s + Self::modulus() as i64
        } else {
            s
        } as u64;
        let inv = Self::new_unchecked(inv);
        Some(inv)
    }

    /// Check if a field element `z` has a multiplicative inverse `1/z`
    ///
    /// If you know that the characteristic is a prime it is usually
    /// better to check if `z` is zero.
    pub const fn has_inv(&self) -> bool {
        gcd(self.0, Self::modulus()) == 1
    }

    const fn info() -> &'static Z64Info {
        &Self::INFO
    }

    /// The field characteristic `P`
    pub const fn modulus() -> u64 {
        P
    }

    #[allow(missing_docs)]
    pub const fn modulus_inv() -> SpInverse64 {
        Self::info().p_inv
    }

    /// `z` to some integer power `exp`
    pub fn powi(self, exp: i64) -> Self {
        if exp < 0 {
            self.powu((-exp) as u64).inv()
        } else {
            self.powu(exp as u64)
        }
    }

    /// `z` to some integer power `exp`
    pub fn powu(mut self, mut exp: u64) -> Self {
        assert!(P > 1);
        let mut res = Self::new_unchecked(1);
        while exp > 0 {
            if exp & 1 != 0 {
                res *= self
            };
            self *= self;
            exp /= 2;
        }
        res
    }

    #[cfg(any(feature = "rand", feature = "num-traits"))]
    pub(crate) const fn repr(self) -> u64 {
        self.0
    }
}

impl<const P: u64, const Q: u32> From<Z32<Q>> for Z64<P> {
    fn from(z: Z32<Q>) -> Self {
        u32::from(z).into()
    }
}

impl<const P: u64> From<Z64<P>> for u128 {
    fn from(i: Z64<P>) -> Self {
        i.0 as _
    }
}

impl<const P: u64> From<Z64<P>> for i128 {
    fn from(i: Z64<P>) -> Self {
        i.0 as _
    }
}

impl<const P: u64> From<Z64<P>> for u64 {
    fn from(i: Z64<P>) -> Self {
        i.0
    }
}

impl<const P: u64> From<Z64<P>> for i64 {
    fn from(i: Z64<P>) -> Self {
        i.0 as i64
    }
}

impl<const P: u64> TryFrom<Z64<P>> for u32 {
    type Error = TryFromIntError;

    fn try_from(i: Z64<P>) -> Result<Self, Self::Error> {
        i.0.try_into()
    }
}

impl<const P: u64> TryFrom<Z64<P>> for i32 {
    type Error = TryFromIntError;

    fn try_from(i: Z64<P>) -> Result<Self, Self::Error> {
        i.0.try_into()
    }
}

impl<const P: u64> TryFrom<Z64<P>> for u16 {
    type Error = TryFromIntError;

    fn try_from(i: Z64<P>) -> Result<Self, Self::Error> {
        i.0.try_into()
    }
}

impl<const P: u64> TryFrom<Z64<P>> for i16 {
    type Error = TryFromIntError;

    fn try_from(i: Z64<P>) -> Result<Self, Self::Error> {
        i.0.try_into()
    }
}

impl<const P: u64> TryFrom<Z64<P>> for u8 {
    type Error = TryFromIntError;

    fn try_from(i: Z64<P>) -> Result<Self, Self::Error> {
        i.0.try_into()
    }
}

impl<const P: u64> TryFrom<Z64<P>> for i8 {
    type Error = TryFromIntError;

    fn try_from(i: Z64<P>) -> Result<Self, Self::Error> {
        i.0.try_into()
    }
}

impl<const P: u64> From<u128> for Z64<P> {
    fn from(u: u128) -> Self {
        (u.rem_euclid(P as u128) as u64).into()
    }
}

impl<const P: u64> From<i128> for Z64<P> {
    fn from(i: i128) -> Self {
        (i.rem_euclid(P as i128) as u64).into()
    }
}

impl<const P: u64> From<u64> for Z64<P> {
    fn from(u: u64) -> Self {
        let num = remu(u, Self::modulus(), Self::info().red_struct) as u64;
        Self::new_unchecked(num)
    }
}

impl<const P: u64> From<i64> for Z64<P> {
    fn from(i: i64) -> Self {
        Self::new(i)
    }
}

impl<const P: u64> From<i32> for Z64<P> {
    fn from(i: i32) -> Self {
        Self::from(i as i64)
    }
}

impl<const P: u64> From<u32> for Z64<P> {
    fn from(u: u32) -> Self {
        Self::from(u as u64)
    }
}

impl<const P: u64> From<i16> for Z64<P> {
    fn from(i: i16) -> Self {
        Self::from(i as i64)
    }
}

impl<const P: u64> From<u16> for Z64<P> {
    fn from(u: u16) -> Self {
        Self::from(u as u64)
    }
}

impl<const P: u64> From<i8> for Z64<P> {
    fn from(i: i8) -> Self {
        Self::from(i as i64)
    }
}

impl<const P: u64> From<u8> for Z64<P> {
    fn from(u: u8) -> Self {
        Self::from(u as u64)
    }
}

impl<'a, const P: u64> TryFrom<&'a str> for Z64<P> {
    type Error = ParseIntError;

    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
        s.parse()
    }
}

impl<const P: u64> FromStr for Z64<P> {
    type Err = ParseIntError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let z = s.parse()?;
        if z >= P {
            return Err(IntErrorKind::PosOverflow.into());
        }
        // # Safety
        // we just checked that z < P
        Ok(Self::new_unchecked(z))
    }
}

impl<const P: u64> Display for Z64<P> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}", self.0)
    }
}

impl<const P: u64> AddAssign for Z64<P> {
    fn add_assign(&mut self, rhs: Self) {
        *self = *self + rhs;
    }
}

impl<const P: u64> AddAssign<&Z64<P>> for Z64<P> {
    fn add_assign(&mut self, rhs: &Self) {
        *self = *self + *rhs;
    }
}

impl<const P: u64> SubAssign for Z64<P> {
    fn sub_assign(&mut self, rhs: Self) {
        *self = *self - rhs;
    }
}

impl<const P: u64> SubAssign<&Z64<P>> for Z64<P> {
    fn sub_assign(&mut self, rhs: &Self) {
        *self -= *rhs;
    }
}

impl<const P: u64> MulAssign for Z64<P> {
    fn mul_assign(&mut self, rhs: Self) {
        *self = *self * rhs;
    }
}

impl<const P: u64> MulAssign<&Z64<P>> for Z64<P> {
    fn mul_assign(&mut self, rhs: &Self) {
        *self = *self * *rhs;
    }
}

impl<const P: u64> DivAssign for Z64<P> {
    fn div_assign(&mut self, rhs: Self) {
        *self = *self / rhs;
    }
}

impl<const P: u64> DivAssign<&Z64<P>> for Z64<P> {
    fn div_assign(&mut self, rhs: &Self) {
        *self = *self / *rhs;
    }
}

impl<const P: u64> Add for Z64<P> {
    type Output = Self;

    fn add(self, rhs: Self) -> Self::Output {
        let res = correct_excess((self.0 + rhs.0) as i64, Self::modulus());
        debug_assert!(res >= 0);
        let res = res as u64;
        Self::new_unchecked(res)
    }
}

impl<const P: u64> Add for &Z64<P> {
    type Output = Z64<P>;

    fn add(self, rhs: Self) -> Self::Output {
        *self + *rhs
    }
}

impl<const P: u64> Add<Z64<P>> for &Z64<P> {
    type Output = Z64<P>;

    fn add(self, rhs: Z64<P>) -> Self::Output {
        *self + rhs
    }
}

impl<const P: u64> Add<&Z64<P>> for Z64<P> {
    type Output = Z64<P>;

    fn add(self, rhs: &Z64<P>) -> Self::Output {
        self + *rhs
    }
}

impl<const P: u64> Sub for Z64<P> {
    type Output = Self;

    fn sub(self, rhs: Self) -> Self::Output {
        let res =
            correct_deficit(self.0 as i64 - rhs.0 as i64, Self::modulus());
        debug_assert!(res >= 0);
        let res = res as u64;
        Self::new_unchecked(res)
    }
}

impl<const P: u64> Sub for &Z64<P> {
    type Output = Z64<P>;

    fn sub(self, rhs: Self) -> Self::Output {
        *self - *rhs
    }
}

impl<const P: u64> Sub<Z64<P>> for &Z64<P> {
    type Output = Z64<P>;

    fn sub(self, rhs: Z64<P>) -> Self::Output {
        *self - rhs
    }
}

impl<const P: u64> Sub<&Z64<P>> for Z64<P> {
    type Output = Z64<P>;

    fn sub(self, rhs: &Z64<P>) -> Self::Output {
        self - *rhs
    }
}

impl<const P: u64> Neg for Z64<P> {
    type Output = Self;

    fn neg(self) -> Self::Output {
        Self::default() - self
    }
}

impl<const P: u64> Mul for Z64<P> {
    type Output = Self;

    fn mul(self, rhs: Self) -> Self::Output {
        let num = mul_mod(self.0, rhs.0, Self::modulus(), Self::modulus_inv());
        Self::new_unchecked(num)
    }
}

impl<const P: u64> Mul for &Z64<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: Self) -> Self::Output {
        *self * *rhs
    }
}

impl<const P: u64> Mul<Z64<P>> for &Z64<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: Z64<P>) -> Self::Output {
        *self * rhs
    }
}

impl<const P: u64> Mul<&Z64<P>> for Z64<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: &Z64<P>) -> Self::Output {
        self * *rhs
    }
}

impl<const P: u64> Div for Z64<P> {
    type Output = Self;

    #[allow(clippy::suspicious_arithmetic_impl)]
    fn div(self, rhs: Self) -> Self::Output {
        self * rhs.inv()
    }
}

const fn mul_mod(a: u64, b: u64, n: u64, ninv: SpInverse64) -> u64 {
    let res = normalised_mul_mod(
        a,
        (b as i64) << ninv.shamt,
        ((n as i64) << ninv.shamt) as u64,
        ninv.inv,
    ) >> ninv.shamt;
    res as u64
}

impl<const P: u64> Div for &Z64<P> {
    type Output = Z64<P>;

    fn div(self, rhs: Self) -> Self::Output {
        *self / *rhs
    }
}

impl<const P: u64> Div<Z64<P>> for &Z64<P> {
    type Output = Z64<P>;

    fn div(self, rhs: Z64<P>) -> Self::Output {
        *self / rhs
    }
}

impl<const P: u64> Div<&Z64<P>> for Z64<P> {
    type Output = Z64<P>;

    fn div(self, rhs: &Z64<P>) -> Self::Output {
        self / *rhs
    }
}

/// Fallible division
pub trait TryDiv<Rhs = Self> {
    /// Result type of successfull division
    type Output;

    /// Tries to divide by the argument.
    ///
    /// `a.try_div(b)` returns `Some(a / b)` if the division
    /// is successful and `None` otherwise.
    #[must_use]
    fn try_div(self, rhs: Rhs) -> Option<Self::Output>;
}

impl<const P: u64> TryDiv for Z64<P> {
    type Output = Self;

    fn try_div(self, rhs: Self) -> Option<Self::Output> {
        rhs.try_inv().map(|i| self * i)
    }
}

impl<const P: u64> TryDiv for &Z64<P> {
    type Output = Z64<P>;

    fn try_div(self, rhs: Self) -> Option<Self::Output> {
        (*self).try_div(*rhs)
    }
}

impl<const P: u64> TryDiv<Z64<P>> for &Z64<P> {
    type Output = Z64<P>;

    fn try_div(self, rhs: Z64<P>) -> Option<Self::Output> {
        (*self).try_div(rhs)
    }
}

impl<const P: u64> TryDiv<&Z64<P>> for Z64<P> {
    type Output = Z64<P>;

    fn try_div(self, rhs: &Z64<P>) -> Option<Self::Output> {
        self.try_div(*rhs)
    }
}

impl<const P: u64> Sum for Z64<P> {
    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
        iter.fold(Self::new_unchecked(0), |a, b| a + b)
    }
}

impl<const P: u64> Product for Z64<P> {
    fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
        iter.fold(Self::new_unchecked(1), |a, b| a * b)
    }
}

const fn normalised_mul_mod(a: u64, b: i64, n: u64, ninv: u64) -> i64 {
    let u = a as u128 * b as u128;
    let h = (u >> (SP_NBITS - 2)) as u64;
    let q = u128_mul_high(h, ninv) >> POST_SHIFT;
    let l = u as u64;
    let r = l.wrapping_sub(q.wrapping_mul(n));
    debug_assert!(r < 2 * n);
    correct_excess(r as i64, n)
}

const fn remu(z: u64, p: u64, red: ReduceStruct) -> i64 {
    let q = u128_mul_high(z, red.ninv);
    let r = (z - q.wrapping_mul(p)) as i64;
    correct_excess(r, p)
}

const fn remi(z: i64, p: u64, red: ReduceStruct) -> i64 {
    let zu = (z as u64) & ((1u64 << (u64::BITS - 1)) - 1);
    let r = remu(zu, p, red);
    let s = i64_sign_mask(z) & (red.sgn as i64);
    correct_deficit(r - s, p)
}

const fn u128_mul_high(a: u64, b: u64) -> u64 {
    u128_get_high(a as u128 * b as u128)
}

const fn u128_get_high(u: u128) -> u64 {
    (u >> u64::BITS) as u64
}

const fn correct_excess(a: i64, p: u64) -> i64 {
    let n = p as i64;
    (a - n) + (i64_sign_mask(a - n) & n)
}

const fn correct_deficit(a: i64, p: u64) -> i64 {
    a + (i64_sign_mask(a) & (p as i64))
}

#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
struct ExtendedGCDResult {
    gcd: u64,
    bezout: [i64; 2],
}

const fn extended_gcd(a: u64, b: u64) -> ExtendedGCDResult {
    let mut old_r = a;
    let mut r = b;
    let mut old_s = 1;
    let mut s = 0;
    let mut old_t = 0;
    let mut t = 1;

    while r != 0 {
        let quotient = old_r / r;
        (old_r, r) = (r, old_r - quotient * r);
        (old_s, s) = (s, old_s - quotient as i64 * s);
        (old_t, t) = (t, old_t - quotient as i64 * t);
    }
    ExtendedGCDResult {
        gcd: old_r,
        bezout: [old_s, old_t],
    }
}

const fn gcd(mut a: u64, mut b: u64) -> u64 {
    while b != 0 {
        (a, b) = (b, a % b)
    }
    a
}

const SP_NBITS: u32 = u64::BITS - 2;
const PRE_SHIFT2: u32 = 2 * SP_NBITS + 1;
const POST_SHIFT: u32 = 1;

const fn used_bits(z: u64) -> u32 {
    u64::BITS - z.leading_zeros()
}

#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
struct Z64Info {
    p: u64,
    p_inv: SpInverse64,
    red_struct: ReduceStruct,
}

impl Z64Info {
    const fn new(p: u64) -> Self {
        assert!(p > 1);
        assert!(used_bits(p) <= SP_NBITS);

        let p_inv = prep_mul_mod(p);
        let red_struct = prep_rem(p);
        Self {
            p,
            p_inv,
            red_struct,
        }
    }
}

const fn prep_mul_mod(p: u64) -> SpInverse64 {
    let shamt = p.leading_zeros() - (u64::BITS - SP_NBITS);
    let inv = normalised_prep_mul_mod(p << shamt);
    SpInverse64 { inv, shamt }
}

#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
struct ReduceStruct {
    ninv: u64,
    sgn: u64,
}

const fn prep_rem(p: u64) -> ReduceStruct {
    let mut q = (1 << (u64::BITS - 1)) / p;
    // r = 2^63 % p
    let r = (1 << (u64::BITS - 1)) - q * p;

    q *= 2;
    q += correct_excess_quo(2 * r as i64, p as i64).0;

    ReduceStruct { ninv: q, sgn: r }
}

const fn correct_excess_quo(a: i64, n: i64) -> (u64, i64) {
    if a >= n { (1, a - n) } else { (0, a) }
}

const fn i64_sign_mask(i: i64) -> i64 {
    i >> (u64::BITS - 1)
}

const fn u64_sign_mask(i: u64) -> i64 {
    i64_sign_mask(i as i64)
}

#[allow(missing_docs)]
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct SpInverse64 {
    inv: u64,
    shamt: u32,
}

// Adapted from NTL's sp_NormalizedPrepMulMod
//
// Floating-point arithmetic replaced be u128 / i128 to allow `const`.
// The performance impact is not a huge concern since this function
// is only evaluated at compile time and only once for each prime field order.
// This is unlike NTL, where each change triggers a recalculation?
//
// This only works since this function is `const` and can be therefore
// used to compute individual `const INFO` inside `Z64<P>` for each
// `P`. The alternatives `lazy_static!` or `OnceCell` would not be
// recomputed, but instead incorrectly shared between `Z64<P>` with
// different `P`!
const fn normalised_prep_mul_mod(n: u64) -> u64 {
    // NOTE: this is an initial approximation
    //       the true quotient is <= 2^SP_NBITS
    const MAX: u128 = 1u128 << (2 * SP_NBITS - 1);
    let init_quot_approx = MAX / n as u128;

    let approx_rem = MAX - n as u128 * init_quot_approx;

    let approx_rem = (approx_rem << (PRE_SHIFT2 - 2 * SP_NBITS + 1)) - 1;

    let approx_rem_low = approx_rem as u64;
    let s1 = (approx_rem >> u64::BITS) as u64;
    let s2 = approx_rem_low >> (u64::BITS - 1);
    let approx_rem_high = s1.wrapping_add(s2);

    let approx_rem_low = approx_rem_low as i64;
    let approx_rem_high = approx_rem_high as i64;

    let bpl = 1i128 << u64::BITS;

    let fr = approx_rem_low as i128 + approx_rem_high as i128 * bpl;

    // now convert fr*ninv to a long
    // but we have to be careful: fr may be negative.
    // the result should still give floor(r/n) pm 1,
    // and is computed in a way that avoids branching

    let mut q1 = (fr / n as i128) as i64;
    if q1 < 0 {
        // This counteracts the round-to-zero behavior of conversion
        // to i64.  It should be compiled into branch-free code.
        q1 -= 1
    }

    let mut q1 = q1 as u64;
    let approx_rem_low = approx_rem_low as u64;
    let sub = q1.wrapping_mul(n);

    let approx_rem = approx_rem_low.wrapping_sub(sub);

    q1 += (1
        + u64_sign_mask(approx_rem)
        + u64_sign_mask(approx_rem.wrapping_sub(n))) as u64;

    ((init_quot_approx as u64) << (PRE_SHIFT2 - 2 * SP_NBITS + 1))
        .wrapping_add(q1)

    // NTL_PRE_SHIFT1 is 0, so no further shift required
}

/// [Z64] variant for fast repeated multiplication
///
/// Use this variant to speed up repeated multiplication by the same value:
/// ```
/// # use ffnt::{Z64, z64::Z64FastMul};
/// const P: u64 = 10007;
/// let mut numbers = Vec::from_iter((1..1000).map(Z64::<P>::from));
///
/// let factor: Z64FastMul<P> = 12.into();
/// for number in &mut numbers {
///     // same as `number *= Z64::from(12)`, but faster
///     *number *= &factor;
/// }
/// ```
///
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Z64FastMul<const P: u64> {
    val: Z64<P>,
    val_over_mod_approx: u64,
}

impl<const P: u64> From<Z64<P>> for Z64FastMul<P> {
    fn from(val: Z64<P>) -> Self {
        let val_over_mod_approx = Self::prep_mul_mod_precon(val.0);
        Self {
            val,
            val_over_mod_approx,
        }
    }
}

impl<const P: u64> Z64FastMul<P> {
    fn prep_mul_mod_precon(val: u64) -> u64 {
        let p_inv = Z64::<P>::INFO.p_inv;
        normalized_prep_mul_mod_precon(
            val << p_inv.shamt,
            P << p_inv.shamt,
            p_inv.inv,
        ) << (u64::BITS - SP_NBITS)
    }
}

fn normalized_prep_mul_mod_precon(val: u64, p: u64, p_inv: u64) -> u64 {
    let h = val << 2;
    let q = u128_mul_high(h, p_inv) >> POST_SHIFT;
    let l = val << SP_NBITS;
    let r = l.wrapping_sub(q.wrapping_mul(p)); // r in [0..2*p)
    debug_assert!(r < 2 * p);

    q.saturating_add_signed(1 + i64_sign_mask(r as i64 - p as i64)) // NOTE: not shifted
}

impl<const P: u64> Mul<Z64FastMul<P>> for Z64<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: Z64FastMul<P>) -> Self::Output {
        let res = mul_mod_precon(self.0, rhs.val.0, P, rhs.val_over_mod_approx);
        Z64::new_unchecked(res as u64)
    }
}

impl<const P: u64> Mul<Z64<P>> for Z64FastMul<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: Z64<P>) -> Self::Output {
        rhs * self
    }
}

impl<const P: u64> Mul<Z64FastMul<P>> for &Z64<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: Z64FastMul<P>) -> Self::Output {
        *self * rhs
    }
}

impl<const P: u64> Mul<Z64<P>> for &Z64FastMul<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: Z64<P>) -> Self::Output {
        *self * rhs
    }
}

impl<const P: u64> Mul<&Z64FastMul<P>> for Z64<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: &Z64FastMul<P>) -> Self::Output {
        self * *rhs
    }
}

impl<const P: u64> Mul<&Z64<P>> for Z64FastMul<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: &Z64<P>) -> Self::Output {
        self * *rhs
    }
}

impl<'a, const P: u64> Mul<&'a Z64FastMul<P>> for &Z64<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: &'a Z64FastMul<P>) -> Self::Output {
        *self * *rhs
    }
}

impl<'a, const P: u64> Mul<&'a Z64<P>> for &Z64FastMul<P> {
    type Output = Z64<P>;

    fn mul(self, rhs: &'a Z64<P>) -> Self::Output {
        *self * *rhs
    }
}

impl<const P: u64> MulAssign<Z64FastMul<P>> for Z64<P> {
    fn mul_assign(&mut self, rhs: Z64FastMul<P>) {
        *self = *self * rhs
    }
}

impl<const P: u64> MulAssign<&Z64FastMul<P>> for Z64<P> {
    fn mul_assign(&mut self, rhs: &Z64FastMul<P>) {
        *self = *self * rhs
    }
}

fn mul_mod_precon(lhs: u64, rhs: u64, p: u64, rhs_over_mod_approx: u64) -> i64 {
    let q = u128_mul_high(lhs, rhs_over_mod_approx);
    let lhs_times_rhs = lhs.wrapping_mul(rhs);
    let q_times_p = q.wrapping_mul(p);
    let r = lhs_times_rhs.wrapping_sub(q_times_p);
    correct_excess(r as i64, p)
}

macro_rules! impl_fastmul_from {
    ( $( $t:ty ),* ) => {
        $(
            impl<const P: u64> From<$t> for Z64FastMul<P> {
                fn from(t: $t) -> Self {
                    Self::from(Z64::from(t))
                }
            }
        )*
    }
}

impl_fastmul_from!(i8, i16, i32, i64, i128, u8, u16, u32, u64, u128);

impl<const P: u64> From<Z64FastMul<P>> for Z64<P> {
    fn from(z: Z64FastMul<P>) -> Self {
        z.val
    }
}

#[cfg(test)]
mod tests {

    use ::rand::{Rng, SeedableRng};
    use once_cell::sync::Lazy;
    use rug::{Integer, ops::Pow};

    use super::*;

    const PRIMES: [u64; 3] = [3, 443619635352171979, 1152921504606846883];

    #[test]
    fn z64_has_inv() {
        type Z = Z64<6>;
        assert!(!Z::from(0).has_inv());
        assert!(Z::from(1).has_inv());
        assert!(!Z::from(2).has_inv());
        assert!(!Z::from(3).has_inv());
        assert!(!Z::from(4).has_inv());
        assert!(Z::from(5).has_inv());
        assert_eq!(Z::from(6), Z::from(0));
    }

    #[test]
    #[should_panic]
    fn z64_inv0() {
        type Z = Z64<6>;
        Z::from(0).inv();
    }

    #[test]
    #[should_panic]
    fn z64_inv2() {
        type Z = Z64<6>;
        Z::from(2).inv();
    }

    #[test]
    fn z64_constr() {
        let z: Z64<3> = 2.into();
        assert_eq!(u64::from(z), 2);
        let z: Z64<3> = (-1).into();
        assert_eq!(u64::from(z), 2);
        let z: Z64<3> = 5.into();
        assert_eq!(u64::from(z), 2);

        let z: Z64<3> = 0.into();
        assert_eq!(u64::from(z), 0);
        let z: Z64<3> = 3.into();
        assert_eq!(u64::from(z), 0);

        let z: Z64<3> = 2u32.into();
        assert_eq!(u64::from(z), 2);
        let z: Z64<3> = 5u32.into();
        assert_eq!(u64::from(z), 2);

        let z: Z64<3> = 0u32.into();
        assert_eq!(u64::from(z), 0);
        let z: Z64<3> = 3u32.into();
        assert_eq!(u64::from(z), 0);
    }

    static POINTS: Lazy<[i64; 1000]> = Lazy::new(|| {
        let mut pts = [0; 1000];
        let mut rng = rand_xoshiro::Xoshiro256StarStar::seed_from_u64(0);
        for pt in &mut pts {
            *pt = rng.random();
        }
        pts
    });

    #[test]
    fn tst_conv() {
        for pt in *POINTS {
            let z: Z64<{ PRIMES[0] }> = pt.into();
            let z: i64 = z.into();
            assert_eq!(z, pt.rem_euclid(PRIMES[0] as i64));
        }

        for pt in *POINTS {
            let z: Z64<{ PRIMES[1] }> = pt.into();
            let z: i64 = z.into();
            assert_eq!(z, pt.rem_euclid(PRIMES[1] as i64));
        }

        for pt in *POINTS {
            let z: Z64<{ PRIMES[2] }> = pt.into();
            let z: i64 = z.into();
            assert_eq!(z, pt.rem_euclid(PRIMES[2] as i64));
        }
    }

    #[test]
    fn tst_add() {
        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[0] }> = pt1.into();
            let pt1 = pt1 as i128;
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[0] }> = pt2.into();
                let pt2 = pt2 as i128;
                let sum1: i64 = (z1 + z2).into();
                let sum2 = (pt1 + pt2).rem_euclid(PRIMES[0] as i128) as i64;
                assert_eq!(sum1, sum2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[1] }> = pt1.into();
            let pt1 = pt1 as i128;
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[1] }> = pt2.into();
                let pt2 = pt2 as i128;
                let sum1: i64 = (z1 + z2).into();
                let sum2 = (pt1 + pt2).rem_euclid(PRIMES[1] as i128) as i64;
                assert_eq!(sum1, sum2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[2] }> = pt1.into();
            let pt1 = pt1 as i128;
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[2] }> = pt2.into();
                let pt2 = pt2 as i128;
                let sum1: i64 = (z1 + z2).into();
                let sum2 = (pt1 + pt2).rem_euclid(PRIMES[2] as i128) as i64;
                assert_eq!(sum1, sum2);
            }
        }
    }

    #[test]
    fn tst_sub() {
        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[0] }> = pt1.into();
            let pt1 = pt1 as i128;
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[0] }> = pt2.into();
                let pt2 = pt2 as i128;
                let sum1: i64 = (z1 - z2).into();
                let sum2 = (pt1 - pt2).rem_euclid(PRIMES[0] as i128) as i64;
                assert_eq!(sum1, sum2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[1] }> = pt1.into();
            let pt1 = pt1 as i128;
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[1] }> = pt2.into();
                let pt2 = pt2 as i128;
                let sum1: i64 = (z1 - z2).into();
                let sum2 = (pt1 - pt2).rem_euclid(PRIMES[1] as i128) as i64;
                assert_eq!(sum1, sum2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[2] }> = pt1.into();
            let pt1 = pt1 as i128;
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[2] }> = pt2.into();
                let pt2 = pt2 as i128;
                let sum1: i64 = (z1 - z2).into();
                let sum2 = (pt1 - pt2).rem_euclid(PRIMES[2] as i128) as i64;
                assert_eq!(sum1, sum2);
            }
        }
    }

    #[test]
    fn tst_mul() {
        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[0] }> = pt1.into();
            let pt1 = pt1 as i128;
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[0] }> = pt2.into();
                let pt2 = pt2 as i128;
                let prod1: i64 = (z1 * z2).into();
                let prod2 = (pt1 * pt2).rem_euclid(PRIMES[0] as i128) as i64;
                assert_eq!(prod1, prod2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[1] }> = pt1.into();
            let pt1 = pt1 as i128;
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[1] }> = pt2.into();
                let pt2 = pt2 as i128;
                let prod1: i64 = (z1 * z2).into();
                let prod2 = (pt1 * pt2).rem_euclid(PRIMES[1] as i128) as i64;
                assert_eq!(prod1, prod2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[2] }> = pt1.into();
            let pt1 = pt1 as i128;
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[2] }> = pt2.into();
                let pt2 = pt2 as i128;
                let prod1: i64 = (z1 * z2).into();
                let prod2 = (pt1 * pt2).rem_euclid(PRIMES[2] as i128) as i64;
                assert_eq!(prod1, prod2);
            }
        }
    }

    #[test]
    fn tst_fastmul() {
        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[0] }> = pt1.into();
            let fast_z1 = Z64FastMul::from(z1);
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[0] }> = pt2.into();
                assert_eq!(z1 * z2, fast_z1 * z2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[1] }> = pt1.into();
            let fast_z1 = Z64FastMul::from(z1);
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[1] }> = pt2.into();
                assert_eq!(z1 * z2, fast_z1 * z2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[2] }> = pt1.into();
            let fast_z1 = Z64FastMul::from(z1);
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[2] }> = pt2.into();
                assert_eq!(z1 * z2, fast_z1 * z2);
            }
        }
    }

    #[test]
    fn tst_div() {
        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[0] }> = pt1.into();
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[0] }> = pt2.into();
                if i64::from(z2) == 0 {
                    continue;
                }
                let div = z1 / z2;
                assert_eq!(z1, div * z2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[1] }> = pt1.into();
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[1] }> = pt2.into();
                if i64::from(z2) == 0 {
                    continue;
                }
                let div = z1 / z2;
                assert_eq!(z1, div * z2);
            }
        }

        for pt1 in *POINTS {
            let z1: Z64<{ PRIMES[2] }> = pt1.into();
            for pt2 in *POINTS {
                let z2: Z64<{ PRIMES[2] }> = pt2.into();
                if i64::from(z2) == 0 {
                    continue;
                }
                let div = z1 / z2;
                assert_eq!(z1, div * z2);
            }
        }
    }

    #[test]
    fn tst_pow() {
        let mut rng = rand_xoshiro::Xoshiro256StarStar::seed_from_u64(2849);
        for pt1 in *POINTS {
            let base = Integer::from(pt1);
            for _ in 0..100 {
                let exp: u8 = rng.random();
                let pow = base.clone().pow(exp as u32);
                // ensure remainder is positive and less than the mod
                let ref_pow0 =
                    (pow.clone() % PRIMES[0] + PRIMES[0]) % PRIMES[0];
                let ref_pow0: u64 = ref_pow0.try_into().unwrap();
                let z: Z64<{ PRIMES[0] }> = pt1.into();
                let pow0: u64 = z.powu(exp as u64).into();
                assert_eq!(pow0, ref_pow0);

                let ref_pow0 =
                    (pow.clone() % PRIMES[1] + PRIMES[1]) % PRIMES[1];
                let ref_pow0: u64 = ref_pow0.try_into().unwrap();
                let z: Z64<{ PRIMES[1] }> = pt1.into();
                let pow0: u64 = z.powu(exp as u64).into();
                assert_eq!(pow0, ref_pow0);

                let ref_pow0 = (pow % PRIMES[2] + PRIMES[2]) % PRIMES[2];
                let ref_pow0: u64 = ref_pow0.try_into().unwrap();
                let z: Z64<{ PRIMES[2] }> = pt1.into();
                let pow0: u64 = z.powu(exp as u64).into();
                assert_eq!(pow0, ref_pow0);
            }
        }
    }

    #[test]
    fn tst_from() {
        const P: u64 = 1152921504606846883;
        let z = 9723086427719839101u64;
        assert_eq!(u64::from(Z64::<P>::from(z)), 499714390865064037u64);
    }
}