pxfm 0.1.29 - Docs.rs

/*
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::bits::{biased_exponent_f64, get_exponent_f64, mantissa_f64};
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::logs::log2p1_dyadic_tables::{LOG2P1_F128_POLY, LOG2P1_INVERSE_2, LOG2P1_LOG_INV_2};
use crate::logs::log2p1_tables::{LOG2P1_EXACT, LOG2P1_INVERSE, LOG2P1_LOG_DD_INVERSE};

/* put in h+l a double-double approximation of log(z)-z for
|z| < 0.03125, with absolute error bounded by 2^-67.14
(see analyze_p1a(-0.03125,0.03125) from log1p.sage) */
#[inline]
pub(crate) fn log_p_1a(z: f64) -> DoubleDouble {
    let z2: DoubleDouble = if z.abs() >= f64::from_bits(0x3000000000000000) {
        DoubleDouble::from_exact_mult(z, z)
    } else {
        // avoid spurious underflow
        DoubleDouble::default()
    };
    let z4h = z2.hi * z2.hi;
    /* The following is a degree-11 polynomial generated by Sollya
    approximating log(1+x) for |x| < 0.03125,
    with absolute error < 2^-73.441 and relative error < 2^-67.088
    (see file Pabs_a.sollya).
    The polynomial is P[0]*x + P[1]*x^2 + ... + P[10]*x^11.
    The algorithm assumes that the degree-1 coefficient P[0] is 1
    and the degree-2 coefficient P[1] is -0.5. */
    const PA: [u64; 11] = [
        0x3ff0000000000000,
        0xbfe0000000000000,
        0x3fd5555555555555,
        0xbfcffffffffffe5f,
        0x3fc999999999aa82,
        0xbfc555555583a8c8,
        0x3fc2492491c359e6,
        0xbfbffffc728edeea,
        0x3fbc71c961f34980,
        0xbfb9a82ac77c05f4,
        0x3fb74b40dd1707d3,
    ];
    let p910 = dd_fmla(f64::from_bits(PA[10]), z, f64::from_bits(PA[9]));
    let p78 = dd_fmla(f64::from_bits(PA[8]), z, f64::from_bits(PA[7]));
    let p56 = dd_fmla(f64::from_bits(PA[6]), z, f64::from_bits(PA[5]));
    let p34 = dd_fmla(f64::from_bits(PA[4]), z, f64::from_bits(PA[3]));
    let p710 = dd_fmla(p910, z2.hi, p78);
    let p36 = dd_fmla(p56, z2.hi, p34);
    let mut ph = dd_fmla(p710, z4h, p36);
    ph = dd_fmla(ph, z, f64::from_bits(PA[2]));
    ph *= z2.hi;
    let mut p = DoubleDouble::from_exact_add(-0.5 * z2.hi, ph * z);
    p.lo += -0.5 * z2.lo;
    p
}

/* put in h+l a double-double approximation of log(z)-z for
|z| < 0.00212097167968735, with absolute error bounded by 2^-78.25
(see analyze_p1(-0.00212097167968735,0.00212097167968735)
from accompanying file log1p.sage, which also yields |l| < 2^-69.99) */
#[inline]
fn p_1(z: f64) -> DoubleDouble {
    const P: [u64; 7] = [
        0x3ff0000000000000,
        0xbfe0000000000000,
        0x3fd5555555555550,
        0xbfcfffffffff572d,
        0x3fc999999a2d7868,
        0xbfc5555c0d31b08e,
        0x3fc2476b9058e396,
    ];
    let z2 = DoubleDouble::from_exact_mult(z, z);
    let p56 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
    let p34 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
    let mut ph = dd_fmla(p56, z2.hi, p34);
    /* ph approximates P[3]+P[4]*z+P[5]*z^2+P[6]*z^3 */
    ph = dd_fmla(ph, z, f64::from_bits(P[2]));
    /* ph approximates P[2]+P[3]*z+P[4]*z^2+P[5]*z^3+P[6]*z^4 */
    ph *= z2.hi;
    /* ph approximates P[2]*z^2+P[3]*z^3+P[4]*z^4+P[5]*z^5+P[6]*z^6 */
    let mut p = DoubleDouble::from_exact_add(-0.5 * z2.hi, ph * z);

    p.lo += -0.5 * z2.lo;
    p
}

#[inline]
pub(crate) fn log_fast(e: i32, v_u: u64) -> DoubleDouble {
    let m: u64 = 0x10000000000000u64.wrapping_add(v_u & 0xfffffffffffff);
    /* x = m/2^52 */
    /* if x > sqrt(2), we divide it by 2 to avoid cancellation */
    let c: i32 = if m >= 0x16a09e667f3bcd { 1 } else { 0 };
    let e = e.wrapping_add(c); /* now -1074 <= e <= 1024 */
    static CY: [f64; 2] = [1.0, 0.5];
    static CM: [u32; 2] = [43, 44];

    let i: i32 = (m >> CM[c as usize]) as i32;
    let y = f64::from_bits(v_u) * CY[c as usize];
    const OFFSET: i32 = 362;
    let r = f64::from_bits(LOG2P1_INVERSE[(i - OFFSET) as usize]);
    let log2_inv_dd = LOG2P1_LOG_DD_INVERSE[(i - OFFSET) as usize];
    let l1 = f64::from_bits(log2_inv_dd.1);
    let l2 = f64::from_bits(log2_inv_dd.0);
    let z = dd_fmla(r, y, -1.0); /* exact */
    /* evaluate P(z), for |z| < 0.00212097167968735 */

    let p = p_1(z);

    /* Add e*log(2) to (h,l), where -1074 <= e <= 1023, thus e has at most
    11 bits. log2_h is an integer multiple of 2^-42, so that e*log2_h
    is exact. */
    const LOG2_H: f64 = f64::from_bits(0x3fe62e42fefa3800);
    const LOG2_L: f64 = f64::from_bits(0x3d2ef35793c76730);

    let ee = e as f64;
    let mut vl = DoubleDouble::from_exact_add(f_fmla(ee, LOG2_H, l1), z);
    vl.lo = p.hi + (vl.lo + (l2 + p.lo));

    vl.lo = dd_fmla(ee, LOG2_L, vl.lo);

    vl
}

const INV_LOG2_DD: DoubleDouble = DoubleDouble::new(
    f64::from_bits(0x3c7777d0ffda0d24),
    f64::from_bits(0x3ff71547652b82fe),
);

fn log2p1_accurate_small(x: f64) -> f64 {
    static P_ACC: [u64; 24] = [
        0x3ff71547652b82fe,
        0x3c7777d0ffda0d24,
        0xbfe71547652b82fe,
        0xbc6777d0ffd9ddb8,
        0x3fdec709dc3a03fd,
        0x3c7d27f055481523,
        0xbfd71547652b82fe,
        0xbc5777d1456a14c4,
        0x3fd2776c50ef9bfe,
        0x3c7e4b2a04f81513,
        0xbfcec709dc3a03fd,
        0xbc6d2072e751087a,
        0x3fca61762a7aded9,
        0x3c5f90f4895378ac,
        0xbfc71547652b8301,
        0x3fc484b13d7c02ae,
        0xbfc2776c50ef7591,
        0x3fc0c9a84993cabb,
        0xbfbec709de7b1612,
        0x3fbc68f56ba73fd1,
        0xbfba616c83da87e7,
        0x3fb89f3042097218,
        0xbfb72b376930a3fa,
        0x3fb5d0211d5ab530,
    ];

    /* for degree 11 or more, ulp(c[d]*x^d) < 2^-105.5*|log2p1(x)|
    where c[d] is the degree-d coefficient of Pacc, thus we can compute
    with a double only */

    let mut h = dd_fmla(f64::from_bits(P_ACC[23]), x, f64::from_bits(P_ACC[22])); // degree 16
    for i in (11..=15).rev() {
        h = dd_fmla(h, x, f64::from_bits(P_ACC[(i + 6) as usize])); // degree i
    }
    let mut l = 0.;
    for i in (8..=10).rev() {
        let mut p = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
        l = p.lo;
        p = DoubleDouble::from_exact_add(f64::from_bits(P_ACC[(i + 6) as usize]), p.hi);
        h = p.hi;
        l += p.lo;
    }
    for i in (1..=7).rev() {
        let mut p = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
        l = p.lo;
        p = DoubleDouble::from_exact_add(f64::from_bits(P_ACC[(2 * i - 2) as usize]), p.hi);
        h = p.hi;
        l += p.lo + f64::from_bits(P_ACC[(2 * i - 1) as usize]);
    }
    let pz = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
    pz.to_f64()
}

/* deal with |x| < 2^-900, then log2p1(x) ~ x/log(2) */
#[cold]
fn log2p1_accurate_tiny(x: f64) -> f64 {
    // exceptional values
    if x.abs() == f64::from_bits(0x0002c316a14459d8) {
        return if x > 0. {
            dd_fmla(
                f64::from_bits(0x1a70000000000000),
                f64::from_bits(0x1a70000000000000),
                f64::from_bits(0x0003fc1ce8b1583f),
            )
        } else {
            dd_fmla(
                f64::from_bits(0x9a70000000000000),
                f64::from_bits(0x1a70000000000000),
                f64::from_bits(0x8003fc1ce8b1583f),
            )
        };
    }

    /* first scale x to avoid truncation of l in the underflow region */
    let sx = x * f64::from_bits(0x4690000000000000);
    let mut zh = DoubleDouble::quick_f64_mult(sx, INV_LOG2_DD);

    let res = zh.to_f64() * f64::from_bits(0x3950000000000000); // expected result
    zh.lo += dd_fmla(-res, f64::from_bits(0x4690000000000000), zh.hi);
    // the correction to apply to res is l*2^-106
    /* For all rounding modes, we have underflow
    for |x| <= 0x1.62e42fefa39eep-1023 */
    dyad_fmla(zh.lo, f64::from_bits(0x3950000000000000), res)
}

/* Given x > -1, put in (h,l) a double-double approximation of log2(1+x),
   and return a bound err on the maximal absolute error so that:
   |h + l - log2(1+x)| < err.
   We have x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023.
   This routine is adapted from cr_log1p_fast.
*/
#[inline]
fn log2p1_fast(x: f64, e: i32) -> (DoubleDouble, f64) {
    if e < -5
    /* e <= -6 thus |x| < 2^-5 */
    {
        if e <= -969 {
            /* then |x| might be as small as 2^-969, thus h=x/log(2) might in the
            binade [2^-969,2^-968), with ulp(h) = 2^-1021, and if |l| < ulp(h),
            then l.ulp() might be smaller than 2^-1074. We defer that case to
            the accurate path. */
            // *h = *l = 0;
            // return 1;
            let ax = x.abs();
            let result = if ax < f64::from_bits(0x3960000000000000) {
                log2p1_accurate_tiny(x)
            } else {
                log2p1_accurate_small(x)
            };
            return (DoubleDouble::new(0.0, result), 0.0);
        }
        let mut p = log_p_1a(x);
        let p_lo = p.lo;
        p = DoubleDouble::from_exact_add(x, p.hi);
        p.lo += p_lo;
        p = DoubleDouble::quick_mult(p, INV_LOG2_DD);
        return (p, f64::from_bits(0x3c1d400000000000) * p.hi); /* 2^-61.13 < 0x1.d4p-62 */
    }

    /* (xh,xl) <- 1+x */
    let zx = DoubleDouble::from_full_exact_add(1.0, x);
    let mut v_u = zx.hi.to_bits();
    let e = ((v_u >> 52) as i32).wrapping_sub(0x3ff);
    v_u = (0x3ffu64 << 52) | (v_u & 0xfffffffffffff);
    let mut p = log_fast(e, v_u);

    /* log(xh+xl) = log(xh) + log(1+xl/xh) */
    let c = if zx.hi <= f64::from_bits(0x7fd0000000000000) || zx.lo.abs() >= 4.0 {
        zx.lo / zx.hi
    } else {
        0.
    }; // avoid spurious underflow

    /* Since |xl| < ulp(xh), we have |xl| < 2^-52 |xh|,
    thus |c| < 2^-52, and since |log(1+x)-x| < x^2 for |x| < 0.5,
    we have |log(1+c)-c)| < c^2 < 2^-104. */
    p.lo += c;

    /* now multiply h+l by 1/log(2) */
    p = DoubleDouble::quick_mult(p, INV_LOG2_DD);

    (p, f64::from_bits(0x3bb2300000000000)) /* 2^-67.82 < 0x1.23p-68 */
}

fn log_dyadic_taylor_poly(x: DyadicFloat128) -> DyadicFloat128 {
    let mut r = LOG2P1_F128_POLY[12];
    for i in (0..12).rev() {
        r = x * r + LOG2P1_F128_POLY[i];
    }
    r * x
}

pub(crate) fn log2_dyadic(d: DyadicFloat128, x: f64) -> DyadicFloat128 {
    let biased_exp = biased_exponent_f64(x);
    let e = get_exponent_f64(x);
    let base_mant = mantissa_f64(x);
    let mant = base_mant + if biased_exp != 0 { 1u64 << 52 } else { 0 };
    let lead = mant.leading_zeros();

    let kk = e - (if lead > 11 { lead - 12 } else { 0 }) as i64;
    let mut fe: i16 = kk as i16;

    let adjusted_mant = mant << lead;

    // Find the lookup index
    let mut i: i16 = (adjusted_mant >> 55) as i16;

    if adjusted_mant > 0xb504f333f9de6484 {
        fe = fe.wrapping_add(1);
        i >>= 1;
    }

    let mut x = d;

    x.exponent = x.exponent.wrapping_sub(fe);
    let inverse_2 = LOG2P1_INVERSE_2[(i - 128) as usize];
    let mut z = x * inverse_2;

    const F128_MINUS_ONE: DyadicFloat128 = DyadicFloat128 {
        sign: DyadicSign::Neg,
        exponent: -127,
        mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
    };

    z = z + F128_MINUS_ONE;

    const LOG2: DyadicFloat128 = DyadicFloat128 {
        sign: DyadicSign::Pos,
        exponent: -128,
        mantissa: 0xb172_17f7_d1cf_79ab_c9e3_b398_03f2_f6af_u128,
    };

    // E·log(2)
    let r = LOG2.mul_int64(fe as i64);

    let mut p = log_dyadic_taylor_poly(z);
    p = LOG2P1_LOG_INV_2[(i - 128) as usize] + p;
    p + r
}

#[cold]
fn log2p1_accurate(x: f64) -> f64 {
    let ax = x.abs();

    if ax < f64::from_bits(0x3fa0000000000000) {
        return if ax < f64::from_bits(0x3960000000000000) {
            log2p1_accurate_tiny(x)
        } else {
            log2p1_accurate_small(x)
        };
    }
    let dx = if x > 1.0 {
        DoubleDouble::from_exact_add(x, 1.0)
    } else {
        DoubleDouble::from_exact_add(1.0, x)
    };
    /* log2p1(x) is exact when 1+x = 2^e, thus when 2^e-1 is exactly
    representable. This can only occur when xl=0 here. */
    let mut t: u64 = x.to_bits();
    if dx.lo == 0. {
        /* check if xh is a power of two */
        t = dx.hi.to_bits();
        if (t.wrapping_shl(12)) == 0 {
            let e = ((t >> 52) as i32).wrapping_sub(0x3ff);
            return e as f64;
        }
    }
    /* if x=2^e, the accurate path will fail for directed roundings */
    if (t.wrapping_shl(12)) == 0 {
        let e: i32 = ((t >> 52) as i32).wrapping_sub(0x3ff); // x = 2^e

        /* for e >= 49, log2p1(x) rounds to e for rounding to nearest;
        for e >= 48, log2p1(x) rounds to e for rounding toward zero;
        for e >= 48, log2p1(x) rounds to nextabove(e) for rounding up;
        for e >= 48, log2p1(x) rounds to e for rounding down. */
        if e >= 49 {
            return e as f64 + f64::from_bits(0x3cf0000000000000); // 0x1p-48 = 1/2 ulp(49)
        }
    }
    let x_d = DyadicFloat128::new_from_f64(dx.hi);
    let mut y = log2_dyadic(x_d, dx.hi);
    let mut c = DyadicFloat128::from_div_f64(dx.lo, dx.hi);
    let mut bx = c * c;
    /* multiply X by -1/2 */
    bx.exponent -= 1;
    bx.sign = DyadicSign::Neg;
    /* C <- C - C^2/2 */
    c = c + bx;
    /* |C-log(1+xl/xh)| ~ 2e-64 */
    y = y + c;
    const LOG2_INV: DyadicFloat128 = DyadicFloat128 {
        sign: DyadicSign::Pos,
        exponent: -115,
        mantissa: 0xb8aa_3b29_5c17_f0bb_be87_fed0_691d_3e89_u128,
    };
    y = y * LOG2_INV;
    y.exponent -= 12;
    y.fast_as_f64()
}

/// Computes log2(x+1)
///
/// Max ULP 0.5
pub fn f_log2p1(x: f64) -> f64 {
    let x_u = x.to_bits();
    let e = (((x_u >> 52) & 0x7ff) as i32).wrapping_sub(0x3ff);
    if e == 0x400 || x == 0. || x <= -1.0 {
        /* case NaN/Inf, +/-0 or x <= -1 */
        if e == 0x400 && x.to_bits() != 0xfffu64 << 52 {
            /* NaN or + Inf*/
            return x + x;
        }
        if x <= -1.0
        /* we use the fact that NaN < -1 is false */
        {
            /* log2p(x<-1) is NaN, log2p(-1) is -Inf and raises DivByZero */
            return if x < -1.0 {
                f64::NAN
            } else {
                // x=-1
                f64::NEG_INFINITY
            };
        }
        return x + x; /* +/-0 */
    }

    /* now x > -1 */

    /* check x=2^n-1 for 0 <= n <= 53, where log2p1(x) is exact,
    and we shouldn't raise the inexact flag */
    if 0 <= e && e <= 52 {
        /* T[e]=2^(e+1)-1, i.e., the unique value of the form 2^n-1
        in the interval [2^e, 2^(e+1)). */
        if x == f64::from_bits(LOG2P1_EXACT[e as usize]) {
            return (e + 1) as f64;
        }
    }

    /* For x=2^k-1, -53 <= k <= -1, log2p1(x) = k is also exact. */
    if e == -1 && x < 0. {
        // -1 < x <= -1/2
        let w = (1.0 + x).to_bits(); // 1+x is exact
        if w.wrapping_shl(12) == 0 {
            // 1+x = 2^k
            let k: i32 = ((w >> 52) as i32).wrapping_sub(0x3ff);
            return k as f64;
        }
    }

    /* now x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023 */
    let (p, err) = log2p1_fast(x, e);
    let left = p.hi + (p.lo - err);
    let right = p.hi + (p.lo + err);
    if left == right {
        return left;
    }
    log2p1_accurate(x)
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn test_log2p1() {
        assert_eq!(f_log2p1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008344095884546873),
                   0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012037985753337781);
        assert_eq!(f_log2p1(0.00006669877554532304), 0.00009622278377734607);
        assert_eq!(f_log2p1(1.00006669877554532304), 1.0000481121941047);
        assert_eq!(f_log2p1(-0.90006669877554532304), -3.322890675865049);
    }
}