pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::f_fmla;
use crate::logs::LOG_RANGE_REDUCTION;
use crate::polyeval::{f_estrin_polyeval7, f_polyeval6};

// Generated by SageMath:
// print("[")
// for i in range(128):
//     R = RealField(200)
//     r = R(2)**(-8) * ( R(2)**8 * (R(1) - R(2)**(-8)) / (R(1) + R(i)*R(2)**-7) ).ceil()
//
//     if i == 0 or i == 127:
//         print(double_to_hex(0), ",")
//     else:
//         print(double_to_hex(-r.log10()), ",")
// print("];")
static LOG10_D: [u64; 128] = [
    0x0000000000000000,
    0x3f6be76bd77b4fc3,
    0x3f7c03a80ae5e054,
    0x3f851824c7587eb0,
    0x3f8c3d0837784c41,
    0x3f91b85d6044e9ae,
    0x3f9559bd2406c3ba,
    0x3f9902c31d62a843,
    0x3f9cb38fccd8bfdb,
    0x3f9e8eeb09f2f6cb,
    0x3fa125d0432ea20e,
    0x3fa30838cdc2fbfd,
    0x3fa3faf7c663060e,
    0x3fa5e3966b7e9295,
    0x3fa7d070145f4fd7,
    0x3fa8c878eeb05074,
    0x3faabbcebd84fca0,
    0x3fabb7209d1e24e5,
    0x3fadb11ed766abf4,
    0x3faeafd05035bd3b,
    0x3fb0585283764178,
    0x3fb0d966cc6500fa,
    0x3fb1dd5460c8b16f,
    0x3fb2603072a25f82,
    0x3fb367ba3aaa1883,
    0x3fb3ec6ad5407868,
    0x3fb4f7aad9bbcbaf,
    0x3fb57e3d47c3af7b,
    0x3fb605735ee985f1,
    0x3fb715d0ce367afc,
    0x3fb79efb57b0f803,
    0x3fb828cfed29a215,
    0x3fb93e7de0fc3e80,
    0x3fb9ca5aa1729f45,
    0x3fba56e8325f5c87,
    0x3fbae4285509950b,
    0x3fbb721cd17157e3,
    0x3fbc902a19e65111,
    0x3fbd204698cb42bd,
    0x3fbdb11ed766abf4,
    0x3fbe42b4c16caaf3,
    0x3fbed50a4a26eafc,
    0x3fbffbfc2bbc7803,
    0x3fc0484e4942aa43,
    0x3fc093025a19976c,
    0x3fc0de1b56356b04,
    0x3fc1299a4fb3e306,
    0x3fc175805d1587c1,
    0x3fc1c1ce9955c0c6,
    0x3fc20e8624038fed,
    0x3fc25ba8215af7fc,
    0x3fc2a935ba5f1479,
    0x3fc2f7301cf4e87b,
    0x3fc345987bfeea91,
    0x3fc394700f7953fd,
    0x3fc3e3b8149739d4,
    0x3fc43371cde076c2,
    0x3fc4839e83506c87,
    0x3fc4d43f8275a483,
    0x3fc525561e9256ee,
    0x3fc576e3b0bde0a7,
    0x3fc5c8e998072fe2,
    0x3fc61b6939983048,
    0x3fc66e6400da3f77,
    0x3fc6c1db5f9bb336,
    0x3fc6c1db5f9bb336,
    0x3fc715d0ce367afc,
    0x3fc76a45cbb7e6ff,
    0x3fc7bf3bde099f30,
    0x3fc814b4921bd52b,
    0x3fc86ab17c10bc7f,
    0x3fc86ab17c10bc7f,
    0x3fc8c13437695532,
    0x3fc9183e673394fa,
    0x3fc96fd1b639fc09,
    0x3fc9c7efd734a2f9,
    0x3fca209a84fbcff8,
    0x3fca209a84fbcff8,
    0x3fca79d382bc21d9,
    0x3fcad39c9c2c6080,
    0x3fcb2df7a5c50299,
    0x3fcb2df7a5c50299,
    0x3fcb88e67cf97980,
    0x3fcbe46b087354bc,
    0x3fcc4087384f4f80,
    0x3fcc4087384f4f80,
    0x3fcc9d3d065c5b42,
    0x3fccfa8e765cbb72,
    0x3fccfa8e765cbb72,
    0x3fcd587d96494759,
    0x3fcdb70c7e96e7f3,
    0x3fcdb70c7e96e7f3,
    0x3fce163d527e68cf,
    0x3fce76124046b3f3,
    0x3fce76124046b3f3,
    0x3fced68d819191fc,
    0x3fcf37b15bab08d1,
    0x3fcf37b15bab08d1,
    0x3fcf99801fdb749d,
    0x3fcffbfc2bbc7803,
    0x3fcffbfc2bbc7803,
    0x3fd02f93f4c87101,
    0x3fd06182e84fd4ac,
    0x3fd06182e84fd4ac,
    0x3fd093cc32c90f84,
    0x3fd093cc32c90f84,
    0x3fd0c6711d6abd7a,
    0x3fd0f972f87ff3d6,
    0x3fd0f972f87ff3d6,
    0x3fd12cd31b9c99ff,
    0x3fd12cd31b9c99ff,
    0x3fd16092e5d3a9a6,
    0x3fd194b3bdef6b9e,
    0x3fd194b3bdef6b9e,
    0x3fd1c93712abc7ff,
    0x3fd1c93712abc7ff,
    0x3fd1fe1e5af2c141,
    0x3fd1fe1e5af2c141,
    0x3fd2336b161b3337,
    0x3fd2336b161b3337,
    0x3fd2691ecc29f042,
    0x3fd2691ecc29f042,
    0x3fd29f3b0e15584b,
    0x3fd29f3b0e15584b,
    0x3fd2d5c1760b86bb,
    0x3fd2d5c1760b86bb,
    0x3fd30cb3a7bb3625,
    0x0000000000000000,
];

#[inline]
pub(crate) fn core_log10f(x: f64) -> f64 {
    let x_u = x.to_bits();

    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;

    let mut x_e: i32 = -(E_BIAS as i32);

    // log2(x) = log2(2^x_e * x_m)
    //         = x_e + log2(x_m)
    // Range reduction for log2(x_m):
    // For each x_m, we would like to find r such that:
    //   -2^-8 <= r * x_m - 1 < 2^-7
    let shifted = (x_u >> 45) as i32;
    let index = shifted & 0x7F;
    let r = f64::from_bits(LOG_RANGE_REDUCTION[index as usize]);

    // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
    // all 1's.
    x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i32);
    let e_x = x_e as f64;

    const LOG_10_2_HI: f64 = f64::from_bits(0x3fd34413509f79ff);

    let log_r_dd = LOG10_D[index as usize];

    // hi is exact
    let hi = f_fmla(e_x, LOG_10_2_HI, f64::from_bits(log_r_dd));

    // Set m = 1.mantissa.
    let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64;
    let m = f64::from_bits(x_m);

    let u;
    #[cfg(any(
        all(
            any(target_arch = "x86", target_arch = "x86_64"),
            target_feature = "fma"
        ),
        target_arch = "aarch64"
    ))]
    {
        u = f_fmla(r, m, -1.0); // exact
    }
    #[cfg(not(any(
        all(
            any(target_arch = "x86", target_arch = "x86_64"),
            target_feature = "fma"
        ),
        target_arch = "aarch64"
    )))]
    {
        use crate::logs::LOG_CD;
        let c_m = x_m & 0x3FFF_E000_0000_0000u64;
        let c = f64::from_bits(c_m);
        u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact
    }

    // Polynomial for log(1+x)/x generated in Sollya:
    // d = [-2^-8, 2^-7];
    // f_log10 = log10(1 + x)/x;
    // Q = fpminimax(f_log10, 6, [|D...|], d);
    // See ./notes/log10pf_core.sollya
    let p = f_polyeval6(
        u,
        f64::from_bits(0x3fdbcb7b1526e50e),
        f64::from_bits(0xbfcbcb7b1526e4e2),
        f64::from_bits(0x3fc287a763707f60),
        f64::from_bits(0xbfbbcb7b16f1858d),
        f64::from_bits(0x3fb63c613ca2ee0f),
        f64::from_bits(0xbfb28617a50029a9),
    );
    f_fmla(p, u, hi)
}

/// Computes log10(x+1)
///
/// Max ULP 0.5
#[inline]
pub fn f_log10p1f(x: f32) -> f32 {
    let z = x as f64;
    let ux = x.to_bits().wrapping_shl(1);
    if ux >= 0xffu32 << 24 || ux == 0 {
        // |x| == 0, |x| == inf, x == NaN
        if ux == 0 {
            return x;
        }
        if x.is_infinite() {
            return if x.is_sign_positive() {
                f32::INFINITY
            } else {
                f32::NAN
            };
        }
        return x + f32::NAN;
    }

    let ax = x.to_bits() & 0x7fff_ffffu32;

    // Use log10p1(x) = log10(1 + x) for |x| > 2^-6;
    if ax > 0x3c80_0000u32 {
        if x == -1. {
            return f32::NEG_INFINITY;
        }
        let x1p = z + 1.;
        if x1p <= 0. {
            if x1p == 0. {
                return f32::NEG_INFINITY;
            }
            return f32::NAN;
        }
        return core_log10f(x1p) as f32;
    }

    // log10p1 is expected to be used near zero:
    // Polynomial generated by Sollya:
    // d = [-2^-6; 2^-6];
    // f_log10pf = log10(1+x)/x;
    // Q = fpminimax(f_log10pf, 6, [|D...|], d);
    const C: [u64; 7] = [
        0x3fdbcb7b1526e50e,
        0xbfcbcb7b1526f138,
        0x3fc287a7636f8798,
        0xbfbbcb7b08fcfcad,
        0x3fb63c625d2472a4,
        0xbfb2892c9b620de1,
        0x3fafc7d3af2f4b6c,
    ];
    let p = f_estrin_polyeval7(
        z,
        f64::from_bits(C[0]),
        f64::from_bits(C[1]),
        f64::from_bits(C[2]),
        f64::from_bits(C[3]),
        f64::from_bits(C[4]),
        f64::from_bits(C[5]),
        f64::from_bits(C[6]),
    );
    (p * z) as f32
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_log10p1f() {
        assert_eq!(f_log10p1f(0.0), 0.0);
        assert_eq!(f_log10p1f(1.0), 0.30103);
        assert_eq!(f_log10p1f(-0.0432432), -0.019198442);
        assert_eq!(f_log10p1f(-0.009874634), -0.0043098135);
        assert_eq!(f_log10p1f(-0.000000054233), -2.3553092e-8);
        assert_eq!(f_log10p1f(1.2443), 0.35108092);
        assert_eq!(f_log10p1f(f32::INFINITY), f32::INFINITY);
        assert!(f_log10p1f(f32::NEG_INFINITY).is_nan());
        assert!(f_log10p1f(-1.0432432).is_nan());
    }
}