pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::logs::log1p_dyadic_tables::{LOG1P_R1, LOG1P_R2, LOG1P_S2, LOG1P_S3, LOGP1_R3};

#[cold]
pub(crate) fn log1p_accurate(e_x: i32, index: usize, m_x: DoubleDouble) -> DyadicFloat128 {
    // Minimax polynomial generated by Sollya with:
    // > P = fpminimax((log(1 + x) - x)/x^2, 3, [|1, 128...|],
    //                 [-0x1.01928p-22 , 0x1p-22]);
    // > P;
    // > dirtyinfnorm(log(1 + x)/x - 1 - x*P, [-0x1.01928p-22 , 0x1p-22]);
    // 0x1.ce1e...p-116
    const BIG_COEFFS: [DyadicFloat128; 4] = [
        DyadicFloat128 {
            sign: DyadicSign::Pos,
            exponent: -130,
            mantissa: 0xccccccd7_4818e397_7ed78465_d460315b_u128,
        },
        DyadicFloat128 {
            sign: DyadicSign::Neg,
            exponent: -129,
            mantissa: 0x80000000_000478b0_c6388a23_871ce156_u128,
        },
        DyadicFloat128 {
            sign: DyadicSign::Pos,
            exponent: -129,
            mantissa: 0xaaaaaaaa_aaaaaaaa_aa807bd8_67763262_u128,
        },
        DyadicFloat128 {
            sign: DyadicSign::Neg,
            exponent: -128,
            mantissa: 0x80000000_00000000_00000000_00000000_u128,
        },
    ];

    // log(2) with 128-bit precision generated by SageMath with:
    // def format_hex(value):
    //     l = hex(value)[2:]
    //     n = 4
    //     x = [l[i:i + n] for i in range(0, len(l), n)]
    //     return "0x" + "_".join(x) + "_u128"
    // (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent();
    // print(format_hex(m));
    const LOG_2: DyadicFloat128 = DyadicFloat128 {
        sign: DyadicSign::Pos,
        exponent: -128,
        mantissa: 0xb172_17f7_d1cf_79ab_c9e3_b398_03f2_f6af_u128,
    };

    let e_x_f128 = DyadicFloat128::new_from_f64(e_x as f64);
    let mut sum = LOG_2 * e_x_f128;
    sum = sum + LOG1P_R1[index];

    let mut v = DyadicFloat128::new_from_f64(m_x.hi) + DyadicFloat128::new_from_f64(m_x.lo);

    // Skip 2nd range reduction step if |m_x| <= 2^-15.
    if m_x.hi > f64::from_bits(0x3f00000000000000) || m_x.hi < f64::from_bits(0xbf00000000000000) {
        // Range reduction - Step 2.
        // For k such that: k * 2^-14 - 2^-15 <= m_x.hi < k * 2^-14 + 2^-15,
        // Let s_k = 2^-18 * round( 2^18 / (1 + k*2^-14) ) - 1
        // Then the 2nd reduced argument is:
        //    (1 + s_k) * (1 + m_x) - 1 =
        //  = s_k + m_x + s_k * m_x
        // Output range:
        //   -0x1.1037c00000040271p-15 <= v2.hi + v2.lo <= 0x1.108480000008096cp-15
        let idx2: i32 = (f64::from_bits(0x40d0000000000000)
            * (m_x.hi
                + (91. * f64::from_bits(0x3f10000000000000) + f64::from_bits(0x3f00000000000000))))
            as i32;
        sum = sum + LOG1P_R2[idx2 as usize];
        let s2 = DyadicFloat128::new_from_f64(f64::from_bits(LOG1P_S2[idx2 as usize]));
        v = (v + s2) + (v * s2);
    }

    // Skip 3rd range reduction step if |v| <= 2^-22.
    if v.exponent > -150 {
        // Range reduction - Step 3.
        // For k such that: k * 2^-21 - 2^-22 <= v2.hi < k * 2^-21 + 2^-22,
        // Let s_k = 2^-21 * round( 2^21 / (1 + k*2^-21) ) - 1
        // Then the 3rd reduced argument is:
        //    v3.hi + v3.lo ~ (1 + s_k) * (1 + v2.hi + v2.lo) - 1
        // Output range:
        //   -0x1.012bb800000800114p-22 <= v3.hi + v3.lo <= 0x1p-22
        let zv = v.fast_as_f64();
        let idx3 = (f64::from_bits(0x4140000000000000)
            * (zv
                + (69. * f64::from_bits(0x3ea0000000000000) + f64::from_bits(0x3e90000000000000))))
            as i32;
        sum = sum + LOGP1_R3[idx3 as usize];
        let s3 = DyadicFloat128::new_from_f64(f64::from_bits(LOG1P_S3[idx3 as usize]));
        v = (v + s3) + (v * s3);
    }

    let mut p = v * BIG_COEFFS[0];
    p = v * (p + BIG_COEFFS[1]);
    p = v * (p + BIG_COEFFS[2]);
    p = v * (p + BIG_COEFFS[3]);
    p = v + (v * p);

    sum + p
}