pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::logs::simple_fast_log;
use crate::polyeval::{
    f_estrin_polyeval7, f_estrin_polyeval8, f_estrin_polyeval9, f_polyeval3, f_polyeval5,
    f_polyeval10, f_polyeval11,
};

#[inline]
pub(crate) fn erfinv_core(z: f64, ax: u32, sign: f32) -> f32 {
    if ax <= 0x3c1ba5e3u32 {
        // 0.0095
        // for small |x| using taylor series first 3 terms
        let z2 = z * z;
        // Generated by SageMath:
        // from mpmath import mp, erf
        //
        // mp.prec = 100
        //
        // def inverf_series(n_terms):
        //     from mpmath import taylor
        //     series_erf = taylor(mp.erfinv, 0, n_terms)
        //     return series_erf
        //
        // ser = inverf_series(10)
        // for i in range(1, len(ser), 2):
        //     k = ser[i]
        //     print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
        let p = f_polyeval3(
            z2,
            f64::from_bits(0x3fec5bf891b4ef6b),
            f64::from_bits(0x3fcdb29fb2fee5e4),
            f64::from_bits(0x3fc053c2c0ab91c5),
        ) * z;
        return f32::copysign(p as f32, sign);
    } else if ax <= 0x3d75c28fu32 {
        // 0.06
        // for |x| < 0.06 using taylor series first 5 terms
        let z2 = z * z;
        // Generated by SageMath:
        // from mpmath import mp, erf
        //
        // mp.prec = 100
        //
        // def inverf_series(n_terms):
        //     from mpmath import taylor
        //     series_erf = taylor(mp.erfinv, 0, n_terms)
        //     return series_erf
        //
        // ser = inverf_series(10)
        // for i in range(1, len(ser), 2):
        //     k = ser[i]
        //     print("f64::from_bits(" + double_to_hex(RealField(100)(k)) + "),")
        let p = f_polyeval5(
            z2,
            f64::from_bits(0x3fec5bf891b4ef6b),
            f64::from_bits(0x3fcdb29fb2fee5e4),
            f64::from_bits(0x3fc053c2c0ab91c5),
            f64::from_bits(0x3fb62847c47dda48),
            f64::from_bits(0x3fb0a13189c6ef7a),
        ) * z;
        return f32::copysign(p as f32, sign);
    }

    if ax <= 0x3f400000u32 {
        // |x| <= 0.75
        let z2 = z * z;

        // First step rational approximant is generated, but it's ill-conditioned, thus
        // we're using taylor expansion to create Newton form at the point.
        //
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=InverseErf[Sqrt[x]]/Sqrt[x]
        // {err0, approx}=MiniMaxApproximation[f[z],{z,{0.06,0.75},8,7},WorkingPrecision->70]
        // num=Numerator[approx][[1]];
        // den=Denominator[approx][[1]];
        // poly=num;
        // coeffs=CoefficientList[poly,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let r = z2 - 0.5625;
        // x0=SetPrecision[0.5625,75];
        // NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
        // coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,8}];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let p_num = f_estrin_polyeval9(
            r,
            f64::from_bits(0x3fa329348a73d9d4),
            f64::from_bits(0xbfd2cb089b644580),
            f64::from_bits(0x3fed229149f732d6),
            f64::from_bits(0xbff6a233d2028bff),
            f64::from_bits(0x3ff268adbfbb6023),
            f64::from_bits(0xbfddac401c7d70f4),
            f64::from_bits(0x3fb3b1bd759d5046),
            f64::from_bits(0xbf67aeb45bad547e),
            f64::from_bits(0xbf01ccc7434d381b),
        );
        // x0=SetPrecision[0.5625,75];
        // NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
        // coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,7}];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let p_den = f_estrin_polyeval8(
            r,
            f64::from_bits(0x3fa1aac2ee4b1413),
            f64::from_bits(0xbfd279342e281c99),
            f64::from_bits(0x3feef89a353c6d1b),
            f64::from_bits(0xbffa8f1b7cd6d0a7),
            f64::from_bits(0x3ff89ce6289819a1),
            f64::from_bits(0xbfe7db5282a4a2e1),
            f64::from_bits(0x3fc543f9a928db4a),
            f64::from_bits(0xbf888fd2990e88db),
        );
        let k = (p_num / p_den) * z;
        f32::copysign(k as f32, sign)
    } else if ax <= 0x3f580000u32 {
        // |x| <= 0.84375
        let z2 = z * z;

        // First step rational approximant is generated, but it's ill-conditioned, thus
        // we're using taylor expansion to create Newton form at the point.
        //
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=InverseErf[Sqrt[x]]/Sqrt[x]
        // {err0, approx}=MiniMaxApproximation[f[z],{z,{0.75,0.84375},6,6},WorkingPrecision->70]
        // num=Numerator[approx][[1]];
        // den=Denominator[approx][[1]];
        // poly=num;
        // coeffs=CoefficientList[poly,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let r = z2 - 0.84375;
        // x0=SetPrecision[0.84375,75];
        // NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
        // coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let p_num = f_polyeval10(
            r,
            f64::from_bits(0x3f116d07e62cbb74),
            f64::from_bits(0xbf5c38d390052412),
            f64::from_bits(0x3f92d6f96f84efe3),
            f64::from_bits(0xbfbac9189cae446b),
            f64::from_bits(0x3fd5dd124fb25677),
            f64::from_bits(0xbfe49845d46b80ab),
            f64::from_bits(0x3fe556c4913f60f8),
            f64::from_bits(0xbfd59e527704e33b),
            f64::from_bits(0x3fb07614a5e6c9f1),
            f64::from_bits(0xbf60ce54a2d8a789),
        );
        // x0=SetPrecision[0.84375,75];
        // NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
        // coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let p_den = f_polyeval10(
            r,
            f64::from_bits(0x3f09fbdd1c987d1e),
            f64::from_bits(0xbf5602ad17d419f4),
            f64::from_bits(0x3f8efe31ea5bc71d),
            f64::from_bits(0xbfb77e5f1bd26730),
            f64::from_bits(0x3fd4c3f03e4f5478),
            f64::from_bits(0xbfe5aa87dfc5e757),
            f64::from_bits(0x3fe9c6406f9abc0b),
            f64::from_bits(0xbfdff2f008b4db05),
            f64::from_bits(0x3fc1123be5319800),
            f64::from_bits(0xbf83be49c2d5cb9e),
        );
        let k = (p_num / p_den) * z;
        f32::copysign(k as f32, sign)
    } else if ax <= 0x3f700000u32 {
        // |x| <= 0.9375
        // First step rational approximant is generated, but it's ill-conditioned, thus
        // we're using taylor expansion to create Newton form at the point.
        //
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=InverseErf[Sqrt[x]]/Sqrt[x]
        // {err0, approx}=MiniMaxApproximation[f[z],{z,{0.84375,0.9375},10,9},WorkingPrecision->70]
        // num=Numerator[approx][[1]];
        // den=Denominator[approx][[1]];
        // coeffs=CoefficientList[poly,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let x2 = z * z;
        let r = x2 - 0.87890625;
        // x0=SetPrecision[0.87890625,75];
        // NumberForm[Series[num[x],{x,x0,50}], ExponentFunction->(Null&)]
        // coeffs=Table[SeriesCoefficient[num[x],{x,x0,k}],{k,0,9}];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let p_num = f_polyeval11(
            r,
            f64::from_bits(0x3ec70f1cbf8a758b),
            f64::from_bits(0xbf1c9dff87b698d0),
            f64::from_bits(0x3f5dfe7be00cc21c),
            f64::from_bits(0xbf913fd09c5a3682),
            f64::from_bits(0x3fb7ab0095693976),
            f64::from_bits(0xbfd3b3ca6a3c9919),
            f64::from_bits(0x3fe3533be6d1d8c8),
            f64::from_bits(0xbfe48208ef308ac7),
            f64::from_bits(0x3fd361a82dab69d1),
            f64::from_bits(0xbfa2401965a98195),
            f64::from_bits(0xbf54ba4d14ca54e3),
        );
        // x0=SetPrecision[0.87890625,75];
        // NumberForm[Series[den[x],{x,x0,50}], ExponentFunction->(Null&)]
        // coeffs=Table[SeriesCoefficient[den[x],{x,x0,k}],{k,0,9}];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let p_den = f_polyeval10(
            r,
            f64::from_bits(0x3ec0699f391e2327),
            f64::from_bits(0xbf151ec184941078),
            f64::from_bits(0x3f5717bb379a3c6e),
            f64::from_bits(0xbf8beed3755c3484),
            f64::from_bits(0x3fb46148b4a431ef),
            f64::from_bits(0xbfd25690b7bc76fa),
            f64::from_bits(0x3fe3f1b2f4ee0d9d),
            f64::from_bits(0xbfe888a7a4511975),
            f64::from_bits(0x3fdd84db18f2a240),
            f64::from_bits(0xbfb844807521be56),
        );
        let f = z * (p_num / p_den);
        f32::copysign(f as f32, sign)
    } else {
        // Rational approximation generated by Wolfram Mathematica:
        // for inverf(x) = sqrt(-log(1-x))*R(1/sqrt(-log(1-x)))
        //
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=InverseErf[Exp[-1/(x^2)]*(-1+Exp[1/(x^2)])]/(Sqrt[-Log[1-(Exp[-1/(x^2)]*(-1+Exp[1/(x^2)]))]] )
        // {err0, approx,err1}=MiniMaxApproximation[f[z],{z,{0.2,0.9999999},7,6},WorkingPrecision->90]
        // num=Numerator[approx];
        // den=Denominator[approx];
        // poly=num;
        // coeffs=CoefficientList[poly,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        // poly=den;
        // coeffs=CoefficientList[poly,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let zeta = -simple_fast_log(1. - z);
        let zeta_sqrt = zeta.sqrt();
        let rcp_zeta = (1. / zeta) * zeta_sqrt;
        let p_num = f_estrin_polyeval8(
            rcp_zeta,
            f64::from_bits(0x3ff00072876c578e),
            f64::from_bits(0x40314e00c10282da),
            f64::from_bits(0x404f4a1412af03f6),
            f64::from_bits(0x404c895cc0d9b1b3),
            f64::from_bits(0x404545794620bfaf),
            f64::from_bits(0x403264d21ea21354),
            f64::from_bits(0x3fc5a5141dd19237),
            f64::from_bits(0xbf8c2e49707c21ec),
        );
        let p_den = f_estrin_polyeval7(
            rcp_zeta,
            f64::from_bits(0x3ff0000000000000),
            f64::from_bits(0x403151312c313d77),
            f64::from_bits(0x405032345fa3d0cd),
            f64::from_bits(0x4053e0a81d4c5f09),
            f64::from_bits(0x4054fa20c5e0731c),
            f64::from_bits(0x404620d7f94d4804),
            f64::from_bits(0x4035d7400867b81f),
        );
        let r = zeta_sqrt * (p_num / p_den);
        f32::copysign(r as f32, sign)
    }
}

/// Inverse error function
///
/// Max ulp 0.5
pub fn f_erfinvf(x: f32) -> f32 {
    let ax = x.to_bits() & 0x7fff_ffff;

    if ax >= 0x3f800000u32 || ax <= 0x3400_0000u32 {
        // |x| >= 1, |x| == 0, |x| <= f32::EPSILON
        if ax == 0 {
            // |x| == 0
            return 0.;
        }

        if ax <= 0x3400_0000u32 {
            // |x| <= f32::EPSILON
            // inverf(x) ~ Sqrt[Pi]x/2+O[x]^3
            const SQRT_PI_OVER_2: f64 = f64::from_bits(0x3fec5bf891b4ef6b);
            return (x as f64 * SQRT_PI_OVER_2) as f32;
        }

        if ax == 0x3f800000u32 {
            // |x| == 1
            return if x.is_sign_negative() {
                f32::NEG_INFINITY
            } else {
                f32::INFINITY
            };
        }

        // |x| > 1
        return f32::NAN; // |x| == NaN, |x| == Inf, |x| > 1
    }

    let z = f32::from_bits(ax) as f64;
    erfinv_core(z, ax, x)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn f_test_inv_erff() {
        assert!(f_erfinvf(f32::NEG_INFINITY).is_nan());
        assert!(f_erfinvf(f32::INFINITY).is_nan());
        assert!(f_erfinvf(-1.1).is_nan());
        assert!(f_erfinvf(1.1).is_nan());
        assert_eq!(f_erfinvf(f32::EPSILON), 1.05646485e-7);
        assert_eq!(f_erfinvf(-1.), f32::NEG_INFINITY);
        assert_eq!(f_erfinvf(1.), f32::INFINITY);
        assert_eq!(f_erfinvf(0.002), 0.0017724558);
        assert_eq!(f_erfinvf(-0.002), -0.0017724558);
        assert_eq!(f_erfinvf(0.02), 0.017726395);
        assert_eq!(f_erfinvf(-0.02), -0.017726395);
        assert_eq!(f_erfinvf(0.05), 0.044340387);
        assert_eq!(f_erfinvf(-0.05), -0.044340387);
        assert_eq!(f_erfinvf(0.5), 0.47693628);
        assert_eq!(f_erfinvf(-0.5), -0.47693628);
        assert_eq!(f_erfinvf(0.76), 0.8308411);
        assert_eq!(f_erfinvf(-0.76), -0.8308411);
        assert_eq!(f_erfinvf(0.92), 1.2379221);
        assert_eq!(f_erfinvf(-0.92), -1.2379221);
        assert_eq!(f_erfinvf(0.97), 1.5344859);
        assert_eq!(f_erfinvf(-0.97), -1.5344859);
        assert_eq!(f_erfinvf(0.99), 1.8213866);
        assert_eq!(f_erfinvf(-0.99), -1.8213866);
        assert_eq!(f_erfinvf(0.7560265), 0.82385886);
    }
}