pxfm 0.1.28

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::{f_fmla, min_normal_f64};
use crate::rounding::CpuRound;

// Generated by SageMath:
// print("[")
// for k in range(128):
//     k = RealField(150)(k) * RealField(150).pi() / RealField(150)(64)
//     print(double_to_hex(k.sin()) + ",")
// print("];")
pub(crate) static SIN_K_PI_OVER_64: [u64; 128] = [
    0x0000000000000000,
    0x3fa91f65f10dd814,
    0x3fb917a6bc29b42c,
    0x3fc2c8106e8e613a,
    0x3fc8f8b83c69a60b,
    0x3fcf19f97b215f1b,
    0x3fd294062ed59f06,
    0x3fd58f9a75ab1fdd,
    0x3fd87de2a6aea963,
    0x3fdb5d1009e15cc0,
    0x3fde2b5d3806f63b,
    0x3fe073879922ffee,
    0x3fe1c73b39ae68c8,
    0x3fe30ff7fce17035,
    0x3fe44cf325091dd6,
    0x3fe57d69348ceca0,
    0x3fe6a09e667f3bcd,
    0x3fe7b5df226aafaf,
    0x3fe8bc806b151741,
    0x3fe9b3e047f38741,
    0x3fea9b66290ea1a3,
    0x3feb728345196e3e,
    0x3fec38b2f180bdb1,
    0x3feced7af43cc773,
    0x3fed906bcf328d46,
    0x3fee212104f686e5,
    0x3fee9f4156c62dda,
    0x3fef0a7efb9230d7,
    0x3fef6297cff75cb0,
    0x3fefa7557f08a517,
    0x3fefd88da3d12526,
    0x3feff621e3796d7e,
    0x3ff0000000000000,
    0x3feff621e3796d7e,
    0x3fefd88da3d12526,
    0x3fefa7557f08a517,
    0x3fef6297cff75cb0,
    0x3fef0a7efb9230d7,
    0x3fee9f4156c62dda,
    0x3fee212104f686e5,
    0x3fed906bcf328d46,
    0x3feced7af43cc773,
    0x3fec38b2f180bdb1,
    0x3feb728345196e3e,
    0x3fea9b66290ea1a3,
    0x3fe9b3e047f38741,
    0x3fe8bc806b151741,
    0x3fe7b5df226aafaf,
    0x3fe6a09e667f3bcd,
    0x3fe57d69348ceca0,
    0x3fe44cf325091dd6,
    0x3fe30ff7fce17035,
    0x3fe1c73b39ae68c8,
    0x3fe073879922ffee,
    0x3fde2b5d3806f63b,
    0x3fdb5d1009e15cc0,
    0x3fd87de2a6aea963,
    0x3fd58f9a75ab1fdd,
    0x3fd294062ed59f06,
    0x3fcf19f97b215f1b,
    0x3fc8f8b83c69a60b,
    0x3fc2c8106e8e613a,
    0x3fb917a6bc29b42c,
    0x3fa91f65f10dd814,
    0xb69f77598338bfdf,
    0xbfa91f65f10dd814,
    0xbfb917a6bc29b42c,
    0xbfc2c8106e8e613a,
    0xbfc8f8b83c69a60b,
    0xbfcf19f97b215f1b,
    0xbfd294062ed59f06,
    0xbfd58f9a75ab1fdd,
    0xbfd87de2a6aea963,
    0xbfdb5d1009e15cc0,
    0xbfde2b5d3806f63b,
    0xbfe073879922ffee,
    0xbfe1c73b39ae68c8,
    0xbfe30ff7fce17035,
    0xbfe44cf325091dd6,
    0xbfe57d69348ceca0,
    0xbfe6a09e667f3bcd,
    0xbfe7b5df226aafaf,
    0xbfe8bc806b151741,
    0xbfe9b3e047f38741,
    0xbfea9b66290ea1a3,
    0xbfeb728345196e3e,
    0xbfec38b2f180bdb1,
    0xbfeced7af43cc773,
    0xbfed906bcf328d46,
    0xbfee212104f686e5,
    0xbfee9f4156c62dda,
    0xbfef0a7efb9230d7,
    0xbfef6297cff75cb0,
    0xbfefa7557f08a517,
    0xbfefd88da3d12526,
    0xbfeff621e3796d7e,
    0xbff0000000000000,
    0xbfeff621e3796d7e,
    0xbfefd88da3d12526,
    0xbfefa7557f08a517,
    0xbfef6297cff75cb0,
    0xbfef0a7efb9230d7,
    0xbfee9f4156c62dda,
    0xbfee212104f686e5,
    0xbfed906bcf328d46,
    0xbfeced7af43cc773,
    0xbfec38b2f180bdb1,
    0xbfeb728345196e3e,
    0xbfea9b66290ea1a3,
    0xbfe9b3e047f38741,
    0xbfe8bc806b151741,
    0xbfe7b5df226aafaf,
    0xbfe6a09e667f3bcd,
    0xbfe57d69348ceca0,
    0xbfe44cf325091dd6,
    0xbfe30ff7fce17035,
    0xbfe1c73b39ae68c8,
    0xbfe073879922ffee,
    0xbfde2b5d3806f63b,
    0xbfdb5d1009e15cc0,
    0xbfd87de2a6aea963,
    0xbfd58f9a75ab1fdd,
    0xbfd294062ed59f06,
    0xbfcf19f97b215f1b,
    0xbfc8f8b83c69a60b,
    0xbfc2c8106e8e613a,
    0xbfb917a6bc29b42c,
    0xbfa91f65f10dd814,
];

#[inline]
pub(crate) fn reduce_small_pi64(x: f64) -> (f64, i64) {
    // Generated in SageMath:
    // z = RealField(300)(64) / RealField(300).pi()
    // n = 32
    // x_hi = RealField(n)(z)  # convert to f64
    // x_mid = RealField(n)(z - RealField(300)(x_hi))
    // x_lo = RealField(n)(z - RealField(300)(x_hi) - RealField(300)(x_mid))
    // print(double_to_hex(x_hi), ",")
    // print(double_to_hex(x_mid), ",")
    // print(double_to_hex(x_lo), ",")
    const MPI_OVER_SIXTY_FOUR: [u64; 3] =
        [0xbfa921fb54400000, 0xbd80b4611a600000, 0xbb53198a2e037073];
    const SIXTY_EIGHT_OVER_PI: f64 = f64::from_bits(0x40345f306dc9c883);
    let prod_hi = x * SIXTY_EIGHT_OVER_PI;
    let kd = prod_hi.cpu_round();

    // Let y = x - k * (pi/64)
    // Then |y| < pi / 64
    // With extra rounding errors, we can bound |y| < 1.6 * 2^-7.
    let y_hi = f_fmla(kd, f64::from_bits(MPI_OVER_SIXTY_FOUR[0]), x); // Exact
    // |u.hi| < 1.6*2^-7
    let u_hi = f_fmla(kd, f64::from_bits(MPI_OVER_SIXTY_FOUR[1]), y_hi);
    (u_hi, unsafe {
        kd.to_int_unchecked::<i64>() // indeterminate values is always filtered out before this call, as well only lowest bits are used
    })
}

struct SinCosPi64 {
    v_sin: f64,
    v_cos: f64,
}

#[inline]
fn sincos_eval_pi64(x: f64) -> SinCosPi64 {
    let x2 = x * x;
    let x4 = x2 * x2;

    // Sin poly generated by Sollya:
    // d = [0, pi/64];
    // f_sin = sin(x)/x;
    // Q = fpminimax(f_sin, [|0, 2, 4, 6|], [|D...|], d);
    const S: [u64; 4] = [
        0x3ff0000000000000,
        0xbfc5555555555451,
        0x3f8111111072c563,
        0xbf2a01321c030841,
    ];

    let s0 = f_fmla(x2, f64::from_bits(S[1]), f64::from_bits(S[0]));
    let s1 = f_fmla(x2, f64::from_bits(S[3]), f64::from_bits(S[2]));
    let v_sin = f_fmla(x4, s1, s0) * x;

    // Cos poly generated by Sollya:
    // d = [0, pi/64];
    // f_cos = cos(x);
    // Q = fpminimax(f_cos, [|0, 2, 4, 6|], [|1, D...|], d);
    const C: [u64; 4] = [
        0x3ff0000000000000,
        0xbfdffffffffffb6c,
        0x3fa5555553f117c1,
        0xbf56c0f056672a03,
    ];
    let c0 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
    let c1 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
    let v_cos = f_fmla(x4, c1, c0);

    SinCosPi64 { v_sin, v_cos }
}

#[inline]
pub(crate) fn sin_small(z: f64) -> f64 {
    let x_e = (z.to_bits() >> 52) & 0x7ff;
    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;

    if x_e < E_BIAS - 26 {
        return f_fmla(z, f64::from_bits(0xbc90000000000000), z);
    }

    let (angle_dd, k) = reduce_small_pi64(z);
    let sin_cos = sincos_eval_pi64(angle_dd);

    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 64) * pi/64).
    let sk = SIN_K_PI_OVER_64[((k as u64) & 127) as usize];
    let ck = SIN_K_PI_OVER_64[(((k as u64).wrapping_add(32)) & 127) as usize];

    let sin_k = f64::from_bits(sk);
    let cos_k = f64::from_bits(ck);
    f_fmla(sin_cos.v_cos, sin_k, sin_cos.v_sin * cos_k)
}

#[inline]
pub(crate) fn cos_small(z: f64) -> f64 {
    let x_e = (z.to_bits() >> 52) & 0x7ff;
    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;

    if x_e < E_BIAS - 27 {
        // Signed zeros.
        if z == 0.0 {
            return 1.0;
        }
        // For |x| < 2^-26, |sin(x) - x| < ulp(x)/2.
        return 1.0 - min_normal_f64();
    }

    let (angle_dd, k) = reduce_small_pi64(z);

    let sin_cos = sincos_eval_pi64(angle_dd);

    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 64) * pi/64).
    let sk = SIN_K_PI_OVER_64[((k as u64).wrapping_add(64) & 127) as usize];
    let ck = SIN_K_PI_OVER_64[(((k as u64).wrapping_add(32)) & 127) as usize];

    let sin_k = f64::from_bits(sk);
    let cos_k = f64::from_bits(ck);
    f_fmla(sin_cos.v_cos, cos_k, sin_cos.v_sin * sin_k)
}