pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::f_fmla;
use crate::exponents::core_expdf;
use crate::polyeval::{f_estrin_polyeval8, f_polyeval6};

#[inline]
fn core_erfcx(x: f32) -> f64 {
    // x here is already always > 1
    let dx = x as f64;
    if x < 8. {
        // Rational approximant generated by Wolfram Mathematica:
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=Exp[x^2]Erfc[x]
        // {err0,approx,err1}=MiniMaxApproximation[f[z],{z,{1,8},7,7},WorkingPrecision->75,MaxIterations->100]
        // num=Numerator[approx];
        // den=Denominator[approx];
        // coeffs=CoefficientList[num,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        // coeffs=CoefficientList[den,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let p_num = f_estrin_polyeval8(
            dx,
            f64::from_bits(0x3ff00000804c8f8f),
            f64::from_bits(0x3ffb7307ea8fdbeb),
            f64::from_bits(0x3ff7081ba7bc735c),
            f64::from_bits(0x3fe767338b33532a),
            f64::from_bits(0x3fce3c8288507fd6),
            f64::from_bits(0x3fa7ca2cb4ae697f),
            f64::from_bits(0x3f72b11b0dfb2348),
            f64::from_bits(0xbd9f64f0c15c479b),
        );
        let p_den = f_estrin_polyeval8(
            dx,
            f64::from_bits(0x3ff0000000000000),
            f64::from_bits(0x4006c071e850132e),
            f64::from_bits(0x400d30326bc347ee),
            f64::from_bits(0x40060d8d56bada75),
            f64::from_bits(0x3ff56643fc4580eb),
            f64::from_bits(0x3fdb0e194e72a513),
            f64::from_bits(0x3fb5154759b61be3),
            f64::from_bits(0x3f8090b063cce524),
        );
        return p_num / p_den;
    }
    // for large x erfcx(x) ~ 1/sqrt(pi) / x * R(1/x)
    const ONE_OVER_SQRT_PI: f64 = f64::from_bits(0x3fe20dd750429b6d);
    let r = 1. / dx;
    // Rational approximant generated by Wolfram Mathematica:
    // <<FunctionApproximations`
    // ClearAll["Global`*"]
    // f[x_]:=Exp[1/x^2]Erfc[1/x]/x*Sqrt[Pi]
    // {err0,approx}=MiniMaxApproximation[f[z],{z,{2^-12,1/8},5,5},WorkingPrecision->75,MaxIterations->100]
    // num=Numerator[approx][[1]];
    // den=Denominator[approx][[1]];
    // coeffs=CoefficientList[num,z];
    // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
    // coeffs=CoefficientList[den,z];
    // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
    let p_num = f_polyeval6(
        r,
        f64::from_bits(0x3ff0000000000002),
        f64::from_bits(0xbfd09caf2bb541c3),
        f64::from_bits(0x40132238367ae454),
        f64::from_bits(0xc0060bc62c3711b1),
        f64::from_bits(0x40024a90d229158d),
        f64::from_bits(0xc0013665d8ff3813),
    );
    let p_den = f_polyeval6(
        r,
        f64::from_bits(0x3ff0000000000000),
        f64::from_bits(0xbfd09caf2bb5101d),
        f64::from_bits(0x4015223836772f2c),
        f64::from_bits(0xc00715911b5f5f5c),
        f64::from_bits(0x4010b66411ec4e1f),
        f64::from_bits(0xc00b325c767ed436),
    );
    (r * ONE_OVER_SQRT_PI) * (p_num / p_den)
}

/// Scaled complementary error function (exp(x^2)*erfc(x))
///
/// ulp 0.5
pub fn f_erfcxf(x: f32) -> f32 {
    let ux = x.to_bits().wrapping_shl(1);
    if ux >= 0xffu32 << 24 || ux <= 0x6499_999au32 {
        // |x| == 0, |x| == inf, |x| == NaN, |x| <= 1.19209290e-08
        if ux <= 0x6499_999au32 {
            // |x| == 0, |x| <= 1.19209290e-08
            return 1.;
        }
        if x.is_infinite() {
            return if x.is_sign_positive() {
                0.
            } else {
                f32::INFINITY
            };
        }
        return f32::NAN; // x == NaN
    }
    let ax = x.to_bits() & 0x7fff_ffff;
    if x <= -9.382415 {
        // x <= -9.382415
        return f32::INFINITY;
    }
    if ax <= 0x34000000u32 {
        // |x| < ulp(1) we use taylor series at 0
        // erfcx(x) ~ 1-(2 x)/Sqrt[\[Pi]]+x^2-(4 x^3)/(3 Sqrt[\[Pi]])+x^4/2-(8 x^5)/(15 Sqrt[\[Pi]])+O[x]^6
        #[cfg(any(
            all(
                any(target_arch = "x86", target_arch = "x86_64"),
                target_feature = "fma"
            ),
            target_arch = "aarch64"
        ))]
        {
            use crate::common::f_fmlaf;
            const M_TWO_OVER_SQRT_PI: f32 = f32::from_bits(0xbf906ebb);
            return f_fmlaf(x, M_TWO_OVER_SQRT_PI, 1.);
        }
        #[cfg(not(any(
            all(
                any(target_arch = "x86", target_arch = "x86_64"),
                target_feature = "fma"
            ),
            target_arch = "aarch64"
        )))]
        {
            use crate::common::f_fmla;
            const M_TWO_OVER_SQRT_PI: f64 = f64::from_bits(0xbff20dd750429b6d);
            let dx = x as f64;
            return f_fmla(dx, M_TWO_OVER_SQRT_PI, 1.) as f32;
        }
    }

    if ax <= 0x3f800000u32 {
        // |x| <= 1
        let dx = x as f64;
        // Generated by Wolfram Mathematica:
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=Exp[x^2]Erfc[x]
        // {err0,approx}=MiniMaxApproximation[f[z],{z,{-1,1},7,7},WorkingPrecision->75,MaxIterations->100]
        // num=Numerator[approx][[1]];
        // den=Denominator[approx][[1]];
        // coeffs=CoefficientList[num,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        // coeffs=CoefficientList[den,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let p_num = f_estrin_polyeval8(
            dx,
            f64::from_bits(0x3feffffffffffff8),
            f64::from_bits(0x3ff26c328bd2dc5f),
            f64::from_bits(0x3fe6f91b9fa5f58c),
            f64::from_bits(0x3fd09edf3fcf5ee1),
            f64::from_bits(0x3faddb3bcedbff91),
            f64::from_bits(0x3f7e43b5dd4b7587),
            f64::from_bits(0x3f3baab6b3e61d7b),
            f64::from_bits(0xbe83e7d629825321),
        );
        let p_den = f_estrin_polyeval8(
            dx,
            f64::from_bits(0x3ff0000000000000),
            f64::from_bits(0x40023d04ee0abc28),
            f64::from_bits(0x400252b377263d61),
            f64::from_bits(0x3ff510af7f826479),
            f64::from_bits(0x3fddfc089c4731ed),
            f64::from_bits(0x3fba79b040e28b0a),
            f64::from_bits(0x3f8aea2f3579235a),
            f64::from_bits(0x3f485d2875b4f88c),
        );
        return (p_num / p_den) as f32;
    }

    let erfcx_abs_x = core_erfcx(f32::from_bits(ax));
    if x < 0. {
        // exp(x^2)erfc(-x) = 2*exp(x^2) - erfcx(|x|)
        let dx = x as f64;
        return f_fmla(2., core_expdf(dx * dx), -erfcx_abs_x) as f32;
    }
    erfcx_abs_x as f32
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn test_erfcx() {
        assert_eq!(f_erfcxf(5.19209290e-09), 1.0);
        assert_eq!(f_erfcxf(1.19209290e-08), 1.0);
        assert_eq!(f_erfcxf(f32::EPSILON), 0.9999999);
        assert_eq!(f_erfcxf(12.1), 0.046469606);
        assert_eq!(f_erfcxf(7.1), 0.07869752);
        assert_eq!(f_erfcxf(1.1), 0.40173045);
        assert_eq!(f_erfcxf(-0.23), 1.3232007);
        assert_eq!(f_erfcxf(-1.4325), 15.234794);
        assert_eq!(f_erfcxf(-10.), f32::INFINITY);
        assert_eq!(f_erfcxf(f32::INFINITY), 0.);
        assert_eq!(f_erfcxf(f32::NEG_INFINITY), f32::INFINITY);
        assert!(f_erfcxf(f32::NAN).is_nan());
    }
}