pxfm 0.1.4 - Docs.rs

/*
 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::f_fmla;
use crate::dekker::Dekker;
use crate::log2p1_tables::{LOG2P1_EXACT, LOG2P1_INVERSE, LOG2P1_LOG_DD_INVERSE};

/* put in h+l a double-double approximation of log(z)-z for
|z| < 0.03125, with absolute error bounded by 2^-67.14
(see analyze_p1a(-0.03125,0.03125) from log1p.sage) */
#[inline]
pub(crate) fn log_p_1a(z: f64) -> Dekker {
    let z2: Dekker = if z.abs() >= f64::from_bits(0x3000000000000000) {
        Dekker::from_exact_mult(z, z)
    } else {
        // avoid spurious underflow
        Dekker::default()
    };
    let z4h = z2.hi * z2.hi;
    /* The following is a degree-11 polynomial generated by Sollya
    approximating log(1+x) for |x| < 0.03125,
    with absolute error < 2^-73.441 and relative error < 2^-67.088
    (see file Pabs_a.sollya).
    The polynomial is P[0]*x + P[1]*x^2 + ... + P[10]*x^11.
    The algorithm assumes that the degree-1 coefficient P[0] is 1
    and the degree-2 coefficient P[1] is -0.5. */
    const PA: [u64; 11] = [
        0x3ff0000000000000,
        0xbfe0000000000000,
        0x3fd5555555555555,
        0xbfcffffffffffe5f,
        0x3fc999999999aa82,
        0xbfc555555583a8c8,
        0x3fc2492491c359e6,
        0xbfbffffc728edeea,
        0x3fbc71c961f34980,
        0xbfb9a82ac77c05f4,
        0x3fb74b40dd1707d3,
    ];
    let p910 = f_fmla(f64::from_bits(PA[10]), z, f64::from_bits(PA[9]));
    let p78 = f_fmla(f64::from_bits(PA[8]), z, f64::from_bits(PA[7]));
    let p56 = f_fmla(f64::from_bits(PA[6]), z, f64::from_bits(PA[5]));
    let p34 = f_fmla(f64::from_bits(PA[4]), z, f64::from_bits(PA[3]));
    let p710 = f_fmla(p910, z2.hi, p78);
    let p36 = f_fmla(p56, z2.hi, p34);
    let mut ph = f_fmla(p710, z4h, p36);
    ph = f_fmla(ph, z, f64::from_bits(PA[2]));
    ph *= z2.hi;
    let mut p = Dekker::from_exact_add(-0.5 * z2.hi, ph * z);
    p.lo += -0.5 * z2.lo;
    p
}

/* put in h+l a double-double approximation of log(z)-z for
|z| < 0.00212097167968735, with absolute error bounded by 2^-78.25
(see analyze_p1(-0.00212097167968735,0.00212097167968735)
from accompanying file log1p.sage, which also yields |l| < 2^-69.99) */
#[inline]
fn p_1(z: f64) -> Dekker {
    const P: [u64; 7] = [
        0x3ff0000000000000,
        0xbfe0000000000000,
        0x3fd5555555555550,
        0xbfcfffffffff572d,
        0x3fc999999a2d7868,
        0xbfc5555c0d31b08e,
        0x3fc2476b9058e396,
    ];
    let z2 = Dekker::from_exact_mult(z, z);
    let p56 = f_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
    let p34 = f_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
    let mut ph = f_fmla(p56, z2.hi, p34);
    /* ph approximates P[3]+P[4]*z+P[5]*z^2+P[6]*z^3 */
    ph = f_fmla(ph, z, f64::from_bits(P[2]));
    /* ph approximates P[2]+P[3]*z+P[4]*z^2+P[5]*z^3+P[6]*z^4 */
    ph *= z2.hi;
    /* ph approximates P[2]*z^2+P[3]*z^3+P[4]*z^4+P[5]*z^5+P[6]*z^6 */
    let mut p = Dekker::from_exact_add(-0.5 * z2.hi, ph * z);

    p.lo += -0.5 * z2.lo;
    p
}

#[inline]
pub(crate) fn log_fast(e: i32, v_u: u64) -> Dekker {
    let m: u64 = 0x10000000000000u64.wrapping_add(v_u & 0xfffffffffffff);
    /* x = m/2^52 */
    /* if x > sqrt(2), we divide it by 2 to avoid cancellation */
    let c: i32 = if m >= 0x16a09e667f3bcd { 1 } else { 0 };
    let e = e.wrapping_add(c); /* now -1074 <= e <= 1024 */
    static CY: [f64; 2] = [1.0, 0.5];
    static CM: [u32; 2] = [43, 44];

    let i: i32 = (m >> CM[c as usize]) as i32;
    let y = f64::from_bits(v_u) * CY[c as usize];
    const OFFSET: i32 = 362;
    let r = f64::from_bits(LOG2P1_INVERSE[(i - OFFSET) as usize]);
    let log2_inv_dd = LOG2P1_LOG_DD_INVERSE[(i - OFFSET) as usize];
    let l1 = f64::from_bits(log2_inv_dd.1);
    let l2 = f64::from_bits(log2_inv_dd.0);
    let z = f_fmla(r, y, -1.0); /* exact */
    /* evaluate P(z), for |z| < 0.00212097167968735 */

    let p = p_1(z);

    /* Add e*log(2) to (h,l), where -1074 <= e <= 1023, thus e has at most
    11 bits. log2_h is an integer multiple of 2^-42, so that e*log2_h
    is exact. */
    const LOG2_H: f64 = f64::from_bits(0x3fe62e42fefa3800);
    const LOG2_L: f64 = f64::from_bits(0x3d2ef35793c76730);
    /* |log(2) - (h+l)| < 2^-102.01 */
    /* let hh = e * log2_h: hh is an integer multiple of 2^-42,
    with |hh| <= 1074*log2_h
    = 3274082061039582*2^-42. l1 is also an integer multiple of 2^-42,
    with |l1| <= 1524716581803*2^-42. Thus hh+l1 is an integer multiple of
    2^-42, with 2^42*|hh+l1| <= 3275606777621385 < 2^52, thus hh+l1 is exactly
    representable. */

    let ee = e as f64;
    let mut vl = Dekker::from_exact_add(f_fmla(ee, LOG2_H, l1), z);
    /* here |hh+l1|+|z| <= 3275606777621385*2^-42 + 0.0022 < 745
    thus |h| < 745, and the additional error from the fast_two_sum() call is
    bounded by 2^-105*745 < 2^-95.4. */
    /* add ph + pl + l2 to l */
    vl.lo = p.hi + (vl.lo + (l2 + p.lo));
    /* here |ph| < 2.26e-6, |l| < ulp(h) = 2^-43, |l2| < 2^-43 and
    |pl| < 2^-69.99, thus |l2 + pl| < 2^-42 and |*l + l2 + pl| < 2^-41.99,
    and the rounding error on l2 + pl is bounded by 2^-95 (l2 + pl cannot
    be > 2^-42), and that on *l + (...) by 2^-94.
    Now |ph + (*l + (l2 + pl))| < 2.26e-6 + 2^-41.99 < 2^-18.7, thus the
    rounding error on ph + ... is bounded by ulp(2^-18.7) = 2^-71, which
    yields a cumulated error bound of 2^-71 + 2^-95 + 2^-94 < 2^-70.99. */

    vl.lo = f_fmla(ee, LOG2_L, vl.lo);
    /* let l_in be the input value of *l, and l_out the output value.
    We have |l_in| < 2^-18.7 (from above)
    and |e*log2_l| <= 1074*0x1.ef35793c7673p-45
    thus |l_out| < 2^-18.69 and err(l_out) <= ulp(2^-18.69) = 2^-71 */

    /* The absolute error on h + l is bounded by:
       2^-78.25 from the error in the Sollya polynomial plus the rounding errors
                in p_1 (&ph, &pl, z)
       2^-91.94 for the maximal difference |e*(log(2)-(log2_h + log2_l))|
                (|e| <= 1074 and |log(2)-(log2_h + log2_l)| < 2^-102.01)
       2^-97 for the maximal difference |l1 + l2 - (-log(r))|
       2^-95.4 from the fast_two_sum call
       2^-70.99 from the *l = ph + (*l + l2) instruction
       2^-71 from the last __builtin_fma call.
       This gives an absolute error bounded by < 2^-69.99.
    */
    vl
}

const INVLOG2H: f64 = f64::from_bits(0x3ff71547652b82fe);
const INVLOG2L: f64 = f64::from_bits(0x3c7777d0ffda0d24);

fn log2p1_accurate_small(x: f64) -> f64 {
    static P_ACC: [u64; 24] = [
        0x3ff71547652b82fe,
        0x3c7777d0ffda0d24,
        0xbfe71547652b82fe,
        0xbc6777d0ffd9ddb8,
        0x3fdec709dc3a03fd,
        0x3c7d27f055481523,
        0xbfd71547652b82fe,
        0xbc5777d1456a14c4,
        0x3fd2776c50ef9bfe,
        0x3c7e4b2a04f81513,
        0xbfcec709dc3a03fd,
        0xbc6d2072e751087a,
        0x3fca61762a7aded9,
        0x3c5f90f4895378ac,
        0xbfc71547652b8301,
        0x3fc484b13d7c02ae,
        0xbfc2776c50ef7591,
        0x3fc0c9a84993cabb,
        0xbfbec709de7b1612,
        0x3fbc68f56ba73fd1,
        0xbfba616c83da87e7,
        0x3fb89f3042097218,
        0xbfb72b376930a3fa,
        0x3fb5d0211d5ab530,
    ];

    /* for degree 11 or more, ulp(c[d]*x^d) < 2^-105.5*|log2p1(x)|
    where c[d] is the degree-d coefficient of Pacc, thus we can compute
    with a double only */

    let mut h = f_fmla(f64::from_bits(P_ACC[23]), x, f64::from_bits(P_ACC[22])); // degree 16
    for i in (11..=15).rev() {
        h = f_fmla(h, x, f64::from_bits(P_ACC[(i + 6) as usize])); // degree i
    }
    let mut l = 0.;
    for i in (8..10).rev() {
        let mut p = Dekker::f64_mult(x, Dekker::new(l, h));
        l = p.lo;
        p = Dekker::from_exact_add(f64::from_bits(P_ACC[(i + 6) as usize]), p.hi);
        h = p.hi;
        l += p.lo;
    }
    for i in (1..=7).rev() {
        let mut p = Dekker::f64_mult(x, Dekker::new(l, h));
        l = p.lo;
        p = Dekker::from_exact_add(f64::from_bits(P_ACC[(2 * i - 2) as usize]), p.hi);
        h = p.hi;
        l += p.lo + f64::from_bits(P_ACC[(2 * i - 1) as usize]);
    }
    let pz = Dekker::f64_mult(x, Dekker::new(l, h));
    pz.to_f64()
}

/* deal with |x| < 2^-900, then log2p1(x) ~ x/log(2) */
fn log2p1_accurate_tiny(x: f64) -> f64 {
    // exceptional values
    if x.abs() == f64::from_bits(0x0002c316a14459d8) {
        return if x > 0. {
            f_fmla(
                f64::from_bits(0x1a70000000000000),
                f64::from_bits(0x1a70000000000000),
                f64::from_bits(0x0003fc1ce8b1583f),
            )
        } else {
            f_fmla(
                f64::from_bits(0x9a70000000000000),
                f64::from_bits(0x1a70000000000000),
                f64::from_bits(0x8003fc1ce8b1583f),
            )
        };
    }

    /* first scale x to avoid truncation of l in the underflow region */
    let sx = x * f64::from_bits(0x4690000000000000);
    let mut zh = Dekker::f64_mult(sx, Dekker::new(INVLOG2L, INVLOG2H));

    let res = zh.to_f64() * f64::from_bits(0x3950000000000000); // expected result
    zh.lo += f_fmla(-res, f64::from_bits(0x4690000000000000), zh.hi);
    // the correction to apply to res is l*2^-106
    /* For all rounding modes, we have underflow
    for |x| <= 0x1.62e42fefa39eep-1023 */
    f_fmla(zh.lo, f64::from_bits(0x3950000000000000), res)
}

/* Given x > -1, put in (h,l) a double-double approximation of log2(1+x),
   and return a bound err on the maximal absolute error so that:
   |h + l - log2(1+x)| < err.
   We have x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023.
   This routine is adapted from cr_log1p_fast.
*/
#[inline]
fn log2p1_fast(x: f64, e: i32) -> (Dekker, f64) {
    if e < -5
    /* e <= -6 thus |x| < 2^-5 */
    {
        if e <= -969 {
            /* then |x| might be as small as 2^-969, thus h=x/log(2) might in the
            binade [2^-969,2^-968), with ulp(h) = 2^-1021, and if |l| < ulp(h),
            then l.ulp() might be smaller than 2^-1074. We defer that case to
            the accurate path. */
            // *h = *l = 0;
            // return 1;
            let ax = x.abs();
            let result = if ax < f64::from_bits(0x3960000000000000) {
                log2p1_accurate_tiny(x)
            } else {
                log2p1_accurate_small(x)
            };
            return (Dekker::new(0.0, result), 0.0);
        }
        let mut p = log_p_1a(x);
        let p_lo = p.lo;
        p = Dekker::from_exact_add(x, p.hi);
        p.lo += p_lo;

        /* from analyze_x_plus_p1a(rel=true,Xmax=2^-5.) in the accompanying file
        log1p.sage, the relative error is bounded by 2^-61.14 with respect to
        h. We use the fact that we don't need the return value err to be
        positive, since we add/subtract it in the rounding test.
        We also get that the ratio |l/h| is bounded by 2^-50.96. */
        /* now we multiply h+l by 1/log(2) */
        p = Dekker::quick_mult(p, Dekker::new(INVLOG2L, INVLOG2H));

        /* the d_mul() call decomposes into:
         a_mul (h_out, l1, h, INVLOG2H)
         l2 = __builtin_fma (h, INVLOG2L, l1)
         l_out = __builtin_fma (l, INVLOG2H, l2)
         we have |l1| <= ulp(h_out)
         since |INVLOG2L/INVLOG2H| < 2^-55, then |h*INVLOG2L| <= 2^-55*|h_out|
         and since |x| < 2^53*ulp(x): |h*INVLOG2L| <= ulp(h_out)/4
         thus |l2| <= 5/4*ulp(h_out).
         Now since |l/h| < 2^-50.96, |l*INVLOG2H| < 2^-50.96*|h*INVLOG2H|
         < 2^-50.96*(1+2^-52)*|h_out| < 2^-50.95*|h_out| < 4.15*ulp(h_out),
         thus |l_out| < o(4.15*ulp(h_out)+5/4*ulp(h_out)) < 5.5*ulp(h_out).
         The rounding errors are bounded by ulp(l2)+ulp(l_out)
         <= ulp(5/4*ulp(h_out)) + ulp(5.5*ulp(h_out))
         <= 2^-52*(5/4*ulp(h_out)+5.5*ulp(h_out)) [using ulp(x) <= 2^-52*|x|]
         <= 2^-49.2*ulp(h_out)
         We also have to take into account the ignored term l*INVLOG2L:
         |l*INVLOG2L| < 2^-50.96*|h|*2^-55.97*|INVLOG2H|
                      < 2^-106.93*(1+2^-52)*|h_out|
                      < 2^-106.92*|h_out|
                      < 2^-51.92*ulp(h_out) [using |x| < 2^53*ulp(x)]
        and the approximation error in INVLOG2H+INVLOG2L:
        |INVLOG2H + INVLOG2L - 1/log(2)| < 2^-110/log(2)
        The total error of d_mul() is thus bounded by:
        (2^-49.2+2^-51.92)*ulp(h_out) < 2^-48.99*ulp(h_out) < 2^-100.99*|h_out|,
        using again ulp(x) <= 2^-52*|x|.

        The relative error is thus bounded by
        (1+2^-61.14)*(1+2^-100.99)*(1+2^-110)-1 < 2^-61.13 */
        return (p, f64::from_bits(0x3c1d400000000000) * p.hi); /* 2^-61.13 < 0x1.d4p-62 */
    }

    /* (xh,xl) <- 1+x */
    let zx = if x > 1.0 {
        if x < f64::from_bits(0x7fefffffffffffff) {
            Dekker::from_exact_add(x, 1.0)
        } else {
            // avoid spurious overflow for RNDU
            Dekker::new(1.0, x)
        }
    } else {
        Dekker::from_exact_add(1.0, x)
    };
    let mut v_u = zx.hi.to_bits();
    let e = ((v_u >> 52) as i32).wrapping_sub(0x3ff);
    v_u = (0x3ffu64 << 52) | (v_u & 0xfffffffffffff);
    let mut p = log_fast(e, v_u);

    /* log(xh+xl) = log(xh) + log(1+xl/xh) */
    let c = if zx.hi <= f64::from_bits(0x7fd0000000000000) || zx.lo.abs() >= 4.0 {
        zx.lo / zx.hi
    } else {
        0.
    }; // avoid spurious underflow

    /* Since |xl| < ulp(xh), we have |xl| < 2^-52 |xh|,
    thus |c| < 2^-52, and since |log(1+x)-x| < x^2 for |x| < 0.5,
    we have |log(1+c)-c)| < c^2 < 2^-104. */
    p.lo += c;
    /* Since |l_in| < 2^-18.69 (from the analysis of cr_log_fast, see file
    ../log/log.c), and |c| < 2^-52, we have |l| < 2^-18.68, thus the
    rounding error in *l += c is bounded by ulp(2^-18.68) = 2^-71.
    The total absolute error is thus bounded by:
    0x1.b6p-69 + 2^-104 + 2^-71 < 2^-68.02. */

    /* now multiply h+l by 1/log(2) */
    p = Dekker::quick_mult(p, Dekker::new(INVLOG2L, INVLOG2H));
    /* the d_mul() call decomposes into:
       a_mul (h_out, l1, h, INVLOG2H)
       l2 = __builtin_fma (h, INVLOG2L, l1)
       l_out = __builtin_fma (l, INVLOG2H, l2)
       We have three errors:
       * the rounding error in l2 = __builtin_fma (h, INVLOG2L, l1)
       * the rounding error in l_out = __builtin_fma (l, INVLOG2H, l2)
       * the ignored term l * INVLOG2L
       We have |h| < 745 thus |h*INVLOG2H| < 1075 thus |h_out| <= 1075
       and |l1| <= ulp(h_out) <= 2^-42.
       Then |h*INVLOG2L+l1| <= 745*INVLOG2L+2^-42 < 2^-41.9
       thus |l2| < 2^-41.9*(1+2^-52) < 2^-41.8
       and the first rounding error is bounded by ulp(2^-41.8) = 2^-94.
       Now |l*INVLOG2H+l2| < 2^-18.68*INVLOG2H+2^-41.8 < 2^-18.1
       thus |l_out| < 2^-18.1*(1+2^-52) < 2^-18.09
       and the second rounding error is bounded by ulp(2^-18.09) = 2^-71.
       The ignored term is bounded by |l*INVLOG2L| < 2^-18.68*INVLOG2L < 2^-74.1.
       Thus, the absolute error from d_mul() is bounded by:
       2^-94 + 2^-71 + 2^-74.1 < 2^-70.84.

       Adding to the maximal absolute error of 2^-68.02 before d_mul(),
       we get 2^-68.02 + 2^-70.84 < 2^-67.82.
    */

    (p, f64::from_bits(0x3bb2300000000000)) /* 2^-67.82 < 0x1.23p-68 */
}

/// Computes log2(1+x)
///
/// Max ULP 0.504
#[inline]
pub fn f_log2p1(x: f64) -> f64 {
    let x_u = x.to_bits();
    let e = (((x_u >> 52) & 0x7ff) as i32).wrapping_sub(0x3ff);
    if e == 0x400 || x == 0. || x <= -1.0 {
        /* case NaN/Inf, +/-0 or x <= -1 */
        if e == 0x400 && x.to_bits() != 0xfffu64 << 52 {
            /* NaN or + Inf*/
            return x + x;
        }
        if x <= -1.0
        /* we use the fact that NaN < -1 is false */
        {
            /* log2p(x<-1) is NaN, log2p(-1) is -Inf and raises DivByZero */
            return if x < -1.0 {
                f64::NAN
            } else {
                // x=-1
                f64::NEG_INFINITY
            };
        }
        return x + x; /* +/-0 */
    }

    /* now x > -1 */

    /* check x=2^n-1 for 0 <= n <= 53, where log2p1(x) is exact,
    and we shouldn't raise the inexact flag */
    if 0 <= e && e <= 52 {
        /* T[e]=2^(e+1)-1, i.e., the unique value of the form 2^n-1
        in the interval [2^e, 2^(e+1)). */
        if x == f64::from_bits(LOG2P1_EXACT[e as usize]) {
            return (e + 1) as f64;
        }
    }

    /* For x=2^k-1, -53 <= k <= -1, log2p1(x) = k is also exact. */
    if e == -1 && x < 0. {
        // -1 < x <= -1/2
        let w = (1.0 + x).to_bits(); // 1+x is exact
        if w.wrapping_shl(12) == 0 {
            // 1+x = 2^k
            let k: i32 = ((w >> 52) as i32).wrapping_sub(0x3ff);
            return k as f64;
        }
    }

    /* now x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023 */
    let (p, err) = log2p1_fast(x, e);
    p.hi + (p.lo - err)
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn test_log2p1() {
        println!("{}", f_log2p1(0.00006669877554532304));
        assert_eq!(f_log2p1(0.00006669877554532304), 0.00009622278377734607);
        assert_eq!(f_log2p1(1.00006669877554532304), 1.0000481121941047);
        assert_eq!(f_log2p1(-0.90006669877554532304), -3.322890675865049);
    }
}