pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::common::{f_fmla, is_integerf, is_odd_integerf};
use crate::logs::simple_fast_log;
use crate::polyeval::{f_estrin_polyeval4, f_polyeval5, f_polyeval7, f_polyeval8};
use crate::rounding::CpuFloor;
use crate::sin_cosf::fast_sinpif;

#[inline]
pub(crate) fn lgamma_coref(x: f32) -> (f64, i32) {
    let ax = f32::from_bits(x.to_bits() & 0x7fff_ffff);
    let dx = ax as f64;

    let is_positive = x.is_sign_positive();
    let mut sum_parity = 1f64;

    let mut f_res = 0f64;

    let mut signgam: i32 = 1i32;

    // For negative x, since (G is gamma function)
    // -x*G(-x)*G(x) = pi/sin(pi*x),
    // we have
    // G(x) = pi/(sin(pi*x)*(-x)*G(-x))
    // since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
    // Hence, for x<0, signgam = sign(sin(pi*x)) and
    // lgamma(x) = log(|Gamma(x)|)
    //  = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
    if !is_positive {
        let y1 = ax.cpu_floor();
        let fraction = ax - y1; // excess over the boundary

        let a = fast_sinpif(fraction);

        sum_parity = -1.;
        const LOG_PI: f64 = f64::from_bits(0x3ff250d048e7a1bd);
        f_res = LOG_PI - simple_fast_log(a * dx);
        // gamma(x) is negative in (-2n-1,-2n), thus when fx is odd
        let is_odd = (!is_odd_integerf(y1)) as i32;
        signgam = 1 - (is_odd << 1);
    }

    if ax <= 0.007 {
        // Generated by Wolfram Mathematica:
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=LogGamma[x+1]
        // {err0,approx}=MiniMaxApproximation[f[x],{x,{2^-32,0.07},3,3},WorkingPrecision->90]
        // num=Numerator[approx][[1]];
        // den=Denominator[approx][[1]];
        // poly=num;
        // coeffs=CoefficientList[poly,x];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        // poly=den;
        // coeffs=CoefficientList[poly,x];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let num = f_estrin_polyeval4(
            dx,
            f64::from_bits(0xbb49bbc47e595350),
            f64::from_bits(0xbfe2788cfc6fb2d3),
            f64::from_bits(0x3fc829299b97c525),
            f64::from_bits(0x3fd8e2e8aab1247c),
        );
        let den = f_estrin_polyeval4(
            dx,
            f64::from_bits(0x3ff0000000000000),
            f64::from_bits(0x3ff190e5bf5a8162),
            f64::from_bits(0x3fc927632366c502),
            f64::from_bits(0xbf8b4ea7bd1b808e),
        );
        f_res = f_fmla(num / den - simple_fast_log(dx), sum_parity, f_res);
    } else if ax < 1. {
        // Poly for loggamma(x + 1) - log(x) generated by Wolfram Mathematica
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=LogGamma[x+1]
        // {err0, approx}=MiniMaxApproximation[f[z],{z,{0.06,0.9999999},6,6},WorkingPrecision->90]
        // num=Numerator[approx][[1]];
        // den=Denominator[approx][[1]];
        // poly=den;
        // coeffs=CoefficientList[poly,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let num = f_polyeval7(
            dx,
            f64::from_bits(0xbd24e4cf78c8818a),
            f64::from_bits(0xbfe2788cfc6f64e6),
            f64::from_bits(0xbfe1ea632fe853b2),
            f64::from_bits(0x3fd988d2daad3806),
            f64::from_bits(0x3fe1f4870eaafdf4),
            f64::from_bits(0x3fc51e12d5b97330),
            f64::from_bits(0x3f889ebeb9f0c8ff),
        );
        let den = f_polyeval7(
            dx,
            f64::from_bits(0x3ff0000000000000),
            f64::from_bits(0x4003289872066f33),
            f64::from_bits(0x4000373d88c32fe5),
            f64::from_bits(0x3fe71e95bc874164),
            f64::from_bits(0x3fb9936a0e008be7),
            f64::from_bits(0x3f6cba20a1988a60),
            f64::from_bits(0xbef3ca884ba4237a),
        );
        f_res = f_fmla(num / den - simple_fast_log(dx), sum_parity, f_res);
    } else if ax <= 4. {
        // Poly for loggamma(x + 1) - log(x) generated by Wolfram Mathematica
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=LogGamma[x+1]
        // {err0, approx}=MiniMaxApproximation[f[z],{z,{1.0000001,4},7,7},WorkingPrecision->90]
        // num=Numerator[approx][[1]];
        // den=Denominator[approx][[1]];
        // poly=den;
        // coeffs=CoefficientList[poly,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let num = f_polyeval8(
            dx,
            f64::from_bits(0xbe630847302a205f),
            f64::from_bits(0xbfe2788c24a7deff),
            f64::from_bits(0xbfdd0f61b8171907),
            f64::from_bits(0x3fdab4ec27801e42),
            f64::from_bits(0x3fde11fd49427a3c),
            f64::from_bits(0x3fc0dbce39c660da),
            f64::from_bits(0x3f88e0b4cd3e97f7),
            f64::from_bits(0x3f328fcd9e9ee94e),
        );
        let den = f_polyeval8(
            dx,
            f64::from_bits(0x3ff0000000000000),
            f64::from_bits(0x4001b135a31bffae),
            f64::from_bits(0x3ffbbecb5e39f4ea),
            f64::from_bits(0x3fe2e4da095d10bd),
            f64::from_bits(0x3fb63cb7548d0d30),
            f64::from_bits(0x3f73a67613163399),
            f64::from_bits(0x3f10edd4e2b80e6f),
            f64::from_bits(0xbe77f4e4068f9d32),
        );
        f_res = f_fmla(num / den - simple_fast_log(dx), sum_parity, f_res);
    } else if ax <= 12. {
        // Poly for loggamma(x) generated by Wolfram Mathematica
        // <<FunctionApproximations`
        // ClearAll["Global`*"]
        // f[x_]:=LogGamma[x]
        // {err0, approx}=MiniMaxApproximation[f[z],{z,{4,12},7,7},WorkingPrecision->90]
        // num=Numerator[approx][[1]];
        // den=Denominator[approx][[1]];
        // poly=den;
        // coeffs=CoefficientList[poly,z];
        // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
        let num = f_polyeval8(
            dx,
            f64::from_bits(0x40086069fcc21690),
            f64::from_bits(0x4008c8ef8602e78d),
            f64::from_bits(0xc0186323149757ae),
            f64::from_bits(0xbff6a9ded42d3f31),
            f64::from_bits(0x3ff1adb5566cd807),
            f64::from_bits(0x3fcff023390dead6),
            f64::from_bits(0x3f8977a5454ec393),
            f64::from_bits(0x3f21361088f694dc),
        );
        let den = f_polyeval8(
            dx,
            f64::from_bits(0x3ff0000000000000),
            f64::from_bits(0x4016096b397dbe73),
            f64::from_bits(0x401478926593a395),
            f64::from_bits(0x3ff6a931f9e31511),
            f64::from_bits(0x3fc0fbe57bf263a4),
            f64::from_bits(0x3f7023f51ba39a98),
            f64::from_bits(0x3efabd75ebfbde40),
            f64::from_bits(0xbe4b2c9e2e4e7daa),
        );
        f_res = f_fmla(num / den, sum_parity, f_res);
    } else {
        // Stirling's approximation of Log(Gamma) and then Exp[Log[Gamma]]
        let y_recip = 1. / dx;
        let y_sqr = y_recip * y_recip;
        // Bernoulli coefficients generated by SageMath:
        // var('x')
        // def bernoulli_terms(x, N):
        //     S = 0
        //     for k in range(1, N+1):
        //         B = bernoulli(2*k)
        //         term = (B / (2*k*(2*k-1))) * x**((2*k-1))
        //         S += term
        //     return S
        //
        // terms = bernoulli_terms(x, 5)
        let bernoulli_poly = f_polyeval5(
            y_sqr,
            f64::from_bits(0x3fb5555555555555),
            f64::from_bits(0xbf66c16c16c16c17),
            f64::from_bits(0x3f4a01a01a01a01a),
            f64::from_bits(0xbf43813813813814),
            f64::from_bits(0x3f4b951e2b18ff23),
        );
        // Log[Gamma(x)] = x*log(x) - x + 1/2*Log(2*PI/x) + bernoulli_terms
        const LOG2_PI_OVER_2: f64 = f64::from_bits(0x3fed67f1c864beb5);
        let mut log_gamma = f_fmla(bernoulli_poly, y_recip, -dx) + LOG2_PI_OVER_2;
        log_gamma = f_fmla(simple_fast_log(dx), dx - 0.5, log_gamma);
        f_res = f_fmla(log_gamma, sum_parity, f_res);
    }

    (f_res, signgam)
}

/// Computes log(gamma(x))
///
/// Returns gamma value + sign.
///
/// ulp 0.5
pub fn f_lgamma_rf(x: f32) -> (f32, i32) {
    // filter out exceptional cases
    let xb = x.to_bits().wrapping_shl(1);
    if xb >= 0xffu32 << 24 || xb == 0 {
        if x.is_infinite() {
            return (f32::INFINITY, 1);
        }
        if x.is_nan() {
            return (f32::NAN, 1);
        }
        if xb == 0 {
            return (f32::INFINITY, 1 - 2 * (x.to_bits() >> 31) as i32);
        }
    }

    if is_integerf(x) {
        if x == 2. || x == 1. {
            return (0., 1);
        }
        if x.is_sign_negative() {
            let is_odd = (!is_odd_integerf(x)) as i32;
            return (f32::INFINITY, 1 - (is_odd << 1));
        }
    }
    let c_gamma = lgamma_coref(x);
    let fx = c_gamma.0 as f32;
    (fx, c_gamma.1)
}

#[cfg(test)]
mod tests {
    use crate::f_lgamma_rf;

    #[test]
    fn test_lgamma_rf() {
        assert_eq!(f_lgamma_rf(f32::NEG_INFINITY), (f32::INFINITY, 1));
        assert_eq!(f_lgamma_rf(f32::INFINITY), (f32::INFINITY, 1));
        assert_eq!(f_lgamma_rf(0.), (f32::INFINITY, 1));
        assert_eq!(f_lgamma_rf(-0.), (f32::INFINITY, -1));
        assert_eq!(f_lgamma_rf(5.), (3.1780539, 1));
        assert_eq!(f_lgamma_rf(-4.5), (-2.8130841, -1));
        assert_eq!(f_lgamma_rf(-2.0015738), (5.7596655, -1));
    }
}