pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::bits::EXP_MASK;
use crate::common::{dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::sin::range_reduction_small;
use crate::sincos_reduce::LargeArgumentReduction;
use crate::tangent::tanpi_table::TAN_K_PI_OVER_128;

#[inline]
pub(crate) fn tan_eval(u: DoubleDouble) -> (DoubleDouble, f64) {
    // Evaluate tan(y) = tan(x - k * (pi/128))
    // We use the degree-9 Taylor approximation:
    //   tan(y) ~ P(y) = y + y^3/3 + 2*y^5/15 + 17*y^7/315 + 62*y^9/2835
    // Then the error is bounded by:
    //   |tan(y) - P(y)| < 2^-6 * |y|^11 < 2^-6 * 2^-66 = 2^-72.
    // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms
    // < ulp(u_hi^3) gives us:
    //   P(y) = y + y^3/3 + 2*y^5/15 + 17*y^7/315 + 62*y^9/2835 = ...
    // ~ u_hi + u_hi^3 * (1/3 + u_hi^2 * (2/15 + u_hi^2 * (17/315 +
    //                                                     + u_hi^2 * 62/2835))) +
    //        + u_lo (1 + u_hi^2 * (1 + u_hi^2 * 2/3))
    let u_hi_sq = u.hi * u.hi; // Error < ulp(u_hi^2) < 2^(-6 - 52) = 2^-58.
    // p1 ~ 17/315 + u_hi^2 62 / 2835.
    let p1 = f_fmla(
        u_hi_sq,
        f64::from_bits(0x3f9664f4882c10fa),
        f64::from_bits(0x3faba1ba1ba1ba1c),
    );
    // p2 ~ 1/3 + u_hi^2 2 / 15.
    let p2 = f_fmla(
        u_hi_sq,
        f64::from_bits(0x3fc1111111111111),
        f64::from_bits(0x3fd5555555555555),
    );
    // q1 ~ 1 + u_hi^2 * 2/3.
    let q1 = f_fmla(u_hi_sq, f64::from_bits(0x3fe5555555555555), 1.0);
    let u_hi_3 = u_hi_sq * u.hi;
    let u_hi_4 = u_hi_sq * u_hi_sq;
    // p3 ~ 1/3 + u_hi^2 * (2/15 + u_hi^2 * (17/315 + u_hi^2 * 62/2835))
    let p3 = f_fmla(u_hi_4, p1, p2);
    // q2 ~ 1 + u_hi^2 * (1 + u_hi^2 * 2/3)
    let q2 = f_fmla(u_hi_sq, q1, 1.0);
    let tan_lo = f_fmla(u_hi_3, p3, u.lo * q2);
    // Overall, |tan(y) - (u_hi + tan_lo)| < ulp(u_hi^3) <= 2^-71.
    // And the relative errors is:
    // |(tan(y) - (u_hi + tan_lo)) / tan(y) | <= 2*ulp(u_hi^2) < 2^-64
    let err = f_fmla(
        u_hi_3.abs(),
        f64::from_bits(0x3cc0000000000000),
        f64::from_bits(0x3990000000000000),
    );
    (DoubleDouble::from_exact_add(u.hi, tan_lo), err)
}

#[inline]
pub(crate) fn tan_eval_dd(x: DoubleDouble) -> DoubleDouble {
    // Generated by Sollya:
    // d = [0, pi/128];
    // f_tan = tan(x)/x;
    // Q = fpminimax(f_tan, [|0, 2, 4, 6, 8, 10, 12|], [|107...|], d);
    const C: [(u64, u64); 7] = [
        (0x0000000000000000, 0x3ff0000000000000),
        (0x3c75555549d35a4b, 0x3fd5555555555555),
        (0x3c413dd6ea580288, 0x3fc1111111111111),
        (0x3c4e100b946f0c89, 0x3faba1ba1ba1b9fe),
        (0xbc3c180dfac2b8c3, 0x3f9664f488307189),
        (0x3c1fd691c256a14a, 0x3f8226e300c1988e),
        (0x3bec7b08c90fdfc0, 0x3f6d739baeacd204),
    ];
    let x2 = DoubleDouble::quick_mult(x, x);
    let mut p = DoubleDouble::quick_mul_add(
        x2,
        DoubleDouble::from_bit_pair(C[6]),
        DoubleDouble::from_bit_pair(C[5]),
    );
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
    p = DoubleDouble::quick_mul_add_f64(x2, p, f64::from_bits(0x3ff0000000000000));
    let r = DoubleDouble::quick_mult(p, x);
    DoubleDouble::from_exact_add(r.hi, r.lo)
}

#[cold]
fn tan_accurate(y: DoubleDouble, tan_k: DoubleDouble) -> f64 {
    // Computes tan(x) through identities
    // tan(a+b) = (tan(a) + tan(b)) / (1 - tan(a)tan(b)) = (tan(y) + tan(k*pi/128)) / (1 - tan(y)*tan(k*pi/128))
    let tan_y = tan_eval_dd(y);

    // num = tan(y) + tan(k*pi/64)
    let num_dd = DoubleDouble::full_dd_add(tan_y, tan_k);
    // den = 1 - tan(y)*tan(k*pi/64)
    let den_dd = DoubleDouble::mul_add_f64(tan_y, -tan_k, 1.);

    let tan_x = DoubleDouble::div(num_dd, den_dd);
    tan_x.to_f64()
}

#[cold]
fn tan_near_zero_hard(x: f64) -> DoubleDouble {
    // Generated by Sollya:
    // d = [0, 0.03125];
    // f_tan = tan(x)/x;
    // Q = fpminimax(f_tan, [|0, 2, 4, 6, 8, 10, 12|], [|1, 107...|], d);
    const C: [(u64, u64); 7] = [
        (0x0000000000000000, 0x3ff0000000000000),
        (0x3c7555548455da94, 0x3fd5555555555555),
        (0x3c4306a6358cc810, 0x3fc1111111111111),
        (0x3c2ca9cd025ea98c, 0x3faba1ba1ba1b952),
        (0x3c3cb012803c55a5, 0x3f9664f4883eb962),
        (0x3c28afc1adb8f202, 0x3f8226e276097461),
        (0xbbdf8f911392f348, 0x3f6d7791601277ea),
    ];
    let x2 = DoubleDouble::from_exact_mult(x, x);
    let mut p = DoubleDouble::quick_mul_add(
        x2,
        DoubleDouble::from_bit_pair(C[6]),
        DoubleDouble::from_bit_pair(C[5]),
    );
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
    p = DoubleDouble::quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
    p = DoubleDouble::quick_mul_add_f64(x2, p, f64::from_bits(0x3ff0000000000000));
    DoubleDouble::quick_mult_f64(p, x)
}

/// Tangent in double precision
///
/// ULP 0.5
pub fn f_tan(x: f64) -> f64 {
    let x_e = (x.to_bits() >> 52) & 0x7ff;
    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;

    let y: DoubleDouble;
    let k;

    let mut argument_reduction = LargeArgumentReduction::default();

    if x_e < E_BIAS + 16 {
        // |x| < 2^16
        if x_e < E_BIAS - 5 {
            // |x| < 2^-5
            if x_e < E_BIAS - 27 {
                // |x| < 2^-27, |tan(x) - x| < ulp(x)/2.
                if x == 0.0 {
                    // Signed zeros.
                    return x + x;
                }
                return dyad_fmla(x, f64::from_bits(0x3c90000000000000), x);
            }
            let x2 = x * x;
            let x4 = x2 * x2;
            // Generated by Sollya:
            // d = [0, 0.03125];
            // f_tan = tan(x)/x;
            // Q = fpminimax(f_tan, [|0, 2, 4, 6, 8|], [|D...|], d);
            const C: [u64; 4] = [
                0x3fd555555555554b,
                0x3fc1111111142d5b,
                0x3faba1b9860ed843,
                0x3f966a802210f5bb,
            ];
            let p01 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
            let p23 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
            let w0 = f_fmla(x4, p23, p01);
            let w1 = x2 * w0 * x;
            let r = DoubleDouble::from_exact_add(x, w1);
            let err = f_fmla(
                x2,
                f64::from_bits(0x3cb0000000000000), // 2^-52
                f64::from_bits(0x3be0000000000000), // 2^-65
            );
            let ub = r.hi + (r.lo + err);
            let lb = r.hi + (r.lo - err);
            if ub == lb {
                return ub;
            }
            return tan_near_zero_hard(x).to_f64();
        } else {
            // Small range reduction.
            (y, k) = range_reduction_small(x);
        }
    } else {
        // Inf or NaN
        if x_e > 2 * E_BIAS {
            if x.is_nan() {
                return f64::NAN;
            }
            // tan(+-Inf) = NaN
            return x + f64::NAN;
        }

        // Large range reduction.
        (k, y) = argument_reduction.reduce(x);
    }

    let (tan_y, err) = tan_eval(y);

    // Computes tan(x) through identities.
    // tan(a+b) = (tan(a) + tan(b)) / (1 - tan(a)tan(b)) = (tan(y) + tan(k*pi/128)) / (1 - tan(y)*tan(k*pi/128))
    let tan_k = DoubleDouble::from_bit_pair(TAN_K_PI_OVER_128[(k & 255) as usize]);

    // num = tan(y) + tan(k*pi/64)
    let num_dd = DoubleDouble::add(tan_y, tan_k);
    // den = 1 - tan(y)*tan(k*pi/64)
    let den_dd = DoubleDouble::mul_add_f64(tan_y, -tan_k, 1.);

    let tan_x = DoubleDouble::div(num_dd, den_dd);

    // Simple error bound: |1 / den_dd| < 2^(1 + floor(-log2(den_dd)))).
    let den_inv = ((E_BIAS + 1) << (52 + 1)) - (den_dd.hi.to_bits() & EXP_MASK);
    // For tan_x = (num_dd + err) / (den_dd + err), the error is bounded by:
    //   | tan_x - num_dd / den_dd |  <= err * ( 1 + | tan_x * den_dd | ).
    let tan_err = err * f_fmla(f64::from_bits(den_inv), tan_x.hi.abs(), 1.0);

    let err_higher = tan_x.lo + tan_err;
    let err_lower = tan_x.lo - tan_err;

    let tan_upper = tan_x.hi + err_higher;
    let tan_lower = tan_x.hi + err_lower;

    // Ziv_s rounding test.
    if tan_upper == tan_lower {
        return tan_upper;
    }

    tan_accurate(y, tan_k)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn tan_test() {
        assert_eq!(f_tan(0.0003242153424), 0.0003242153537600293);
        assert_eq!(f_tan(-0.3242153424), -0.3360742441422686);
        assert_eq!(f_tan(0.3242153424), 0.3360742441422686);
        assert_eq!(f_tan(-97301943219974110.), 0.000000000000000481917514979303);
        assert_eq!(f_tan(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007291122019556397),
                   0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007291122019556397);
        assert_eq!(f_tan(0.0), 0.0);
        assert_eq!(f_tan(0.4324324324), 0.4615683710506729);
        assert_eq!(f_tan(1.0), 1.5574077246549023);
        assert_eq!(f_tan(-0.5), -0.5463024898437905);
        assert!(f_tan(f64::INFINITY).is_nan());
        assert!(f_tan(f64::NEG_INFINITY).is_nan());
        assert!(f_tan(f64::NAN).is_nan());
    }
}