pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::bessel::j0f::j1f_rsqrt;
use crate::common::f_fmla;
use crate::exponents::core_expf;
use crate::polyeval::{
    f_estrin_polyeval5, f_estrin_polyeval7, f_estrin_polyeval8, f_estrin_polyeval9, f_polyeval6,
};

/// Modified Bessel of the first kind of order 0
///
/// Max ULP 0.5
pub fn f_i0f(x: f32) -> f32 {
    let ux = x.to_bits().wrapping_shl(1);
    if ux >= 0xffu32 << 24 || ux == 0 {
        // |x| == 0, |x| == inf, |x| == NaN
        if ux == 0 {
            // |x| == 0
            return 1.;
        }
        if x.is_infinite() {
            return f32::INFINITY;
        }
        return x + f32::NAN; // x == NaN
    }

    let xb = x.to_bits() & 0x7fff_ffff;

    if xb >= 0x42b7cd32u32 {
        // |x| >= 91.90077
        return f32::INFINITY;
    }

    if xb < 0x40f00000u32 {
        // |x| < 7.5
        if xb < 0x3f800000u32 {
            // |x| < 1
            if xb <= 0x34000000u32 {
                // |x| < f32::EPSILON
                // taylor series for I0(x) ~ 1 + x^2/4 + O(x^4)
                #[cfg(any(
                    all(
                        any(target_arch = "x86", target_arch = "x86_64"),
                        target_feature = "fma"
                    ),
                    target_arch = "aarch64"
                ))]
                {
                    use crate::common::f_fmlaf;
                    return f_fmlaf(x, x * 0.25, 1.);
                }
                #[cfg(not(any(
                    all(
                        any(target_arch = "x86", target_arch = "x86_64"),
                        target_feature = "fma"
                    ),
                    target_arch = "aarch64"
                )))]
                {
                    let dx = x as f64;
                    return f_fmla(dx, dx * 0.25, 1.) as f32;
                }
            }
            return i0f_small(f32::from_bits(xb)) as f32;
        } else if xb <= 0x40600000u32 {
            // |x| < 3.5
            return i0f_1_to_3p5(f32::from_bits(xb));
        } else if xb <= 0x40c00000u32 {
            // |x| < 6
            return i0f_3p5_to_6(f32::from_bits(xb));
        }
        return i0f_6_to_7p5(f32::from_bits(xb));
    }

    i0f_asympt(f32::from_bits(xb))
}

/**
How polynomial is obtained described at [i0f_1_to_7p5].

Computes I0(x) as follows:
I0(x) = 1 + (x/2)^2 * P(x)

This method valid only [0;1]

Generated by Wolfram Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{0.0000001,1},6,0},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
pub(crate) fn i0f_small(x: f32) -> f64 {
    let dx = x as f64;
    const C: f64 = 1. / 4.;
    let eval_x = dx * dx * C;

    let p = f_estrin_polyeval7(
        eval_x,
        f64::from_bits(0x3ff000000000013a),
        f64::from_bits(0x3fcffffffffc20b6),
        f64::from_bits(0x3f9c71c71e6cd6a2),
        f64::from_bits(0x3f5c71c65b0af15f),
        f64::from_bits(0x3f1234796fceb081),
        f64::from_bits(0x3ec0280faf31678c),
        f64::from_bits(0x3e664fd494223545),
    );
    f_fmla(p, eval_x, 1.)
}

/**
Computes I0.

/// Valid only on interval [1;3.5]

as rational approximation I0 = 1 + (x/2)^2 * Pn((x/2)^2)/Qm((x/2)^2))

Generated by Wolram Mathematica:
```python
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=(BesselI[0,x]-1)/(x/2)^2
g[z_]:=f[2 Sqrt[z]]
{err, approx}=MiniMaxApproximation[g[z],{z,{1,3.5},5,4},WorkingPrecision->60]
poly=Numerator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
poly=Denominator[approx][[1]];
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0f_1_to_3p5(x: f32) -> f32 {
    let dx = x as f64;
    const C: f64 = 1. / 4.;
    let eval_x = dx * dx * C;

    let p_num = f_polyeval6(
        eval_x,
        f64::from_bits(0x3feffffffffffb69),
        f64::from_bits(0x3fc9ed7bd9dc97a7),
        f64::from_bits(0x3f915c14693c842e),
        f64::from_bits(0x3f45c6dc6a719e42),
        f64::from_bits(0x3eeacb79eba725f7),
        f64::from_bits(0x3e7b51e2acfc4355),
    );
    let p_den = f_estrin_polyeval5(
        eval_x,
        f64::from_bits(0x3ff0000000000000),
        f64::from_bits(0xbfa84a10988f28eb),
        f64::from_bits(0x3f50f5599197a4be),
        f64::from_bits(0xbeea420cf9b13b1b),
        f64::from_bits(0x3e735d0c1eb6ed7d),
    );

    f_fmla(p_num / p_den, eval_x, 1.) as f32
}

// Valid only on interval [6;7]
// Generated by Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{6,7},7,6},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_6_to_7p5(x: f32) -> f32 {
    let dx = x as f64;
    const C: f64 = 1. / 4.;
    let eval_x = dx * dx * C;

    let p_num = f_estrin_polyeval8(
        eval_x,
        f64::from_bits(0x3fefffffffffff7d),
        f64::from_bits(0x3fcb373b00569ccf),
        f64::from_bits(0x3f939069c3363b81),
        f64::from_bits(0x3f4c2095c90c66b3),
        f64::from_bits(0x3ef6713f648413db),
        f64::from_bits(0x3e947efa2f9936b4),
        f64::from_bits(0x3e2486a182f49420),
        f64::from_bits(0x3da213034a33de33),
    );
    let p_den = f_estrin_polyeval7(
        eval_x,
        f64::from_bits(0x3ff0000000000000),
        f64::from_bits(0xbfa32313fea59d9e),
        f64::from_bits(0x3f460594c2ec6706),
        f64::from_bits(0xbedf725fb714690f),
        f64::from_bits(0x3e6d9cb39b19555c),
        f64::from_bits(0xbdf1900e3abcb7a6),
        f64::from_bits(0x3d64a21a2ea78ef6),
    );

    f_fmla(p_num / p_den, eval_x, 1.) as f32
}

// Valid only on interval [3.5;6]
// Generated in Wolfram Mathematica:
// <<FunctionApproximations`
// ClearAll["Global`*"]
// f[x_]:=(BesselI[0,x]-1)/(x/2)^2
// g[z_]:=f[2 Sqrt[z]]
// {err, approx}=MiniMaxApproximation[g[z],{z,{3.5,6},5,5},WorkingPrecision->60]
// poly=Numerator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
// poly=Denominator[approx][[1]];
// coeffs=CoefficientList[poly,z];
// TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
#[inline]
fn i0f_3p5_to_6(x: f32) -> f32 {
    let dx = x as f64;
    const C: f64 = 1. / 4.;
    let eval_x = dx * dx * C;

    let p_num = f_polyeval6(
        eval_x,
        f64::from_bits(0x3feffffffffd9550),
        f64::from_bits(0x3fc97e18ee033fb4),
        f64::from_bits(0x3f90b3199079bce1),
        f64::from_bits(0x3f442c300a425372),
        f64::from_bits(0x3ee7831030ae18ca),
        f64::from_bits(0x3e76387d67354932),
    );
    let p_den = f_polyeval6(
        eval_x,
        f64::from_bits(0x3ff0000000000000),
        f64::from_bits(0xbfaa079c484e406a),
        f64::from_bits(0x3f5452098f1556fb),
        f64::from_bits(0xbef33efb4a8128ac),
        f64::from_bits(0x3e865996e19448ca),
        f64::from_bits(0xbe09acbb64533c3e),
    );

    f_fmla(p_num / p_den, eval_x, 1.) as f32
}

/**
Asymptotic expansion for I0.

Computes:
sqrt(x) * exp(-x) * I0(x) = Pn(1/x)/Qn(1/x)
hence:
I0(x) = Pn(1/x)/Qm(1/x)*exp(x)/sqrt(x)

Generated by Mathematica:
```text
<<FunctionApproximations`
ClearAll["Global`*"]
f[x_]:=Sqrt[x] Exp[-x] BesselI[0,x]
g[z_]:=f[1/z]
{err, approx}=MiniMaxApproximation[g[z],{z,{1/92.3,1/7.5},8,8},WorkingPrecision->70]
num=Numerator[approx][[1]];
den=Denominator[approx][[1]];
poly=num;
coeffs=CoefficientList[poly,z];
TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
```
**/
#[inline]
fn i0f_asympt(x: f32) -> f32 {
    let dx = x as f64;
    let recip = 1. / dx;
    let p_num = f_estrin_polyeval9(
        recip,
        f64::from_bits(0x3fd9884533d44829),
        f64::from_bits(0xc02c940f40595581),
        f64::from_bits(0x406d41c495c2f762),
        f64::from_bits(0xc0a10ab76dda4520),
        f64::from_bits(0x40c825b1c2a48d07),
        f64::from_bits(0xc0e481d606d0b748),
        f64::from_bits(0x40f34759deefbd40),
        f64::from_bits(0xc0ef4b7fb49fa116),
        f64::from_bits(0x40c409a6f882ba00),
    );
    let p_den = f_estrin_polyeval9(
        recip,
        f64::from_bits(0x3ff0000000000000),
        f64::from_bits(0xc041f8a9131ad229),
        f64::from_bits(0x408278e56af035bb),
        f64::from_bits(0xc0b5a34a108f3e35),
        f64::from_bits(0x40dee6f278ee24f5),
        f64::from_bits(0xc0fa95093b0c4f9f),
        f64::from_bits(0x4109982b87f75651),
        f64::from_bits(0xc10618cc3c91e2db),
        f64::from_bits(0x40e30895aec6fc4f),
    );
    let z = p_num / p_den;

    let e = core_expf(x);
    let r_sqrt = j1f_rsqrt(dx);
    (z * r_sqrt * e) as f32
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_i0f() {
        assert!(f_i0f(f32::NAN).is_nan());
        assert_eq!(f_i0f(f32::NEG_INFINITY), f32::INFINITY);
        assert_eq!(f_i0f(f32::INFINITY), f32::INFINITY);
        assert_eq!(f_i0f(1.), 1.2660658);
        assert_eq!(f_i0f(5.), 27.239872);
        assert_eq!(f_i0f(16.), 893446.25);
        assert_eq!(f_i0f(32.), 5590908000000.0);
        assert_eq!(f_i0f(92.0), f32::INFINITY);
        assert_eq!(f_i0f(0.), 1.0);
        assert_eq!(f_i0f(28.), 109534600000.0);
        assert_eq!(f_i0f(-28.), 109534600000.0);
        assert_eq!(f_i0f(-16.), 893446.25);
        assert_eq!(f_i0f(-32.), 5590908000000.0);
    }
}