pxfm 0.1.28

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::{dd_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1};
use crate::rounding::CpuRound;

#[inline(always)]
fn exp_poly(z: f64) -> DoubleDouble {
    /* The following is a degree-4 polynomial generated by Sollya for exp(x)
    over [-2^-12.905,2^-12.905]
    with absolute error < 2^-74.34 (see sollya/Q_1.sollya). */
    const Q_1: [u64; 5] = [
        0x3ff0000000000000,
        0x3ff0000000000000,
        0x3fe0000000000000,
        0x3fc5555555997996,
        0x3fa5555555849d8d,
    ];
    let mut q = dd_fmla(f64::from_bits(Q_1[4]), z, f64::from_bits(Q_1[3]));
    q = dd_fmla(q, z, f64::from_bits(Q_1[2]));
    let h0 = dd_fmla(q, z, f64::from_bits(Q_1[1]));

    let v1 = DoubleDouble::from_exact_mult(z, h0);
    DoubleDouble::f64_add(f64::from_bits(Q_1[0]), v1)
}

#[inline]
pub(crate) fn i0_exp(r: f64) -> DoubleDouble {
    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);

    let k = (r * INVLOG2).cpu_round();

    const LOG_2E: DoubleDouble = DoubleDouble::new(
        f64::from_bits(0x3d0718432a1b0e26),
        f64::from_bits(0x3f262e42ff000000),
    );

    let zh = f_fmla(LOG_2E.lo, k, f_fmla(-LOG_2E.hi, k, r));

    let bk = unsafe {
        k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
    };
    let mk = (bk >> 12) + 0x3ff;
    let i2 = (bk >> 6) & 0x3f;
    let i1 = bk & 0x3f;

    let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
    let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
    let mut de = DoubleDouble::quick_mult(t1, t0);
    let q = exp_poly(zh);
    de = DoubleDouble::quick_mult(de, q);

    let mut du = (mk as u64).wrapping_shl(52);
    du = f64::from_bits(du).to_bits();
    DoubleDouble::quick_mult_f64(de, f64::from_bits(du))
}

#[inline(always)]
fn exp_poly_dd(z: DoubleDouble) -> DoubleDouble {
    // Generated by Sollya:
    // d = [-2^-12.905,2^-12.905];
    // f = exp(x);
    // w = 1;
    // p = remez(f, 6, d, w);
    // pf = fpminimax(f, [|0,1,2,3,4,5,6|], [|1, 107...|], d, absolute, floating, 0, p);
    // err_p = -log2(dirtyinfnorm(pf*w-f, d));
    // display = decimal;
    const Q_1: [(u64, u64); 7] = [
        (0x0000000000000000, 0x3ff0000000000000),
        (0x3a20e40000000000, 0x3ff0000000000000),
        (0x3a04820000000000, 0x3fe0000000000000),
        (0xbc756423c5338a66, 0x3fc5555555555556),
        (0xbc5560f74db5556c, 0x3fa5555555555556),
        (0x3c3648eca89bc6ac, 0x3f8111111144fbee),
        (0xbbd53d924ae90c8c, 0x3f56c16c16ffeecc),
    ];
    let mut p = DoubleDouble::quick_mul_add(
        z,
        DoubleDouble::from_bit_pair(Q_1[6]),
        DoubleDouble::from_bit_pair(Q_1[5]),
    );
    p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[4]));
    p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[3]));
    p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[2]));
    p = DoubleDouble::quick_mul_add(z, p, DoubleDouble::from_bit_pair(Q_1[1]));
    DoubleDouble::quick_mul_add_f64(z, p, f64::from_bits(0x3ff0000000000000))
}

#[cold]
pub(crate) fn i0_exp_accurate(r: f64) -> DoubleDouble {
    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);

    let k = (r * INVLOG2).cpu_round();

    const L2: DoubleDouble = DoubleDouble::new(
        f64::from_bits(0x3d0718432a1b0e26),
        f64::from_bits(0x3f262e42ff000000),
    );
    const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
    let dx = f_fmla(-L2.hi, k, r);
    let dx_dd = DoubleDouble::quick_mult_f64(DoubleDouble::new(L2LL, L2.lo), k);
    let dz = DoubleDouble::full_add_f64(dx_dd, dx);

    let bk = unsafe {
        k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
    };
    let mk = (bk >> 12) + 0x3ff;
    let i2 = (bk >> 6) & 0x3f;
    let i1 = bk & 0x3f;

    let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
    let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
    let mut de = DoubleDouble::quick_mult(t1, t0);
    let q = exp_poly_dd(dz);
    de = DoubleDouble::quick_mult(de, q);

    let mut du = (mk as u64).wrapping_shl(52);
    du = f64::from_bits(du).to_bits();
    DoubleDouble::quick_mult_f64(de, f64::from_bits(du))
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn test_i0() {
        assert_eq!(i0_exp(0.5).to_f64(), 1.6487212707001282);
        assert_eq!(i0_exp_accurate(0.5).to_f64(), 1.6487212707001282);
    }
}