pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::bessel::j0f::{j0f_asympt_alpha, j0f_asympt_beta, j1f_rsqrt};
use crate::bessel::trigo_bessel::sin_small;
use crate::bessel::y0f_coeffs::{Y0_ZEROS, Y0_ZEROS_VALUES, Y0F_COEFFS};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::fast_logf;
use crate::polyeval::{f_polyeval10, f_polyeval18};
use crate::rounding::CpuCeil;
use crate::sincos_reduce::rem2pif_any;

/// Bessel of the second kind of order 0 (Y0)
///
/// Max ULP 0.5
pub fn f_y0f(x: f32) -> f32 {
    let ux = x.to_bits();
    if ux >= 0xffu32 << 23 || ux == 0 {
        // |x| == 0, |x| == inf, |x| == NaN, x < 0
        if ux.wrapping_shl(1) == 0 {
            return f32::NEG_INFINITY;
        }

        if x.is_infinite() {
            if x.is_sign_negative() {
                return f32::NAN;
            }
            return 0.;
        }

        return x + f32::NAN; // x == NaN
    }

    let xb = x.to_bits();

    if xb <= 0x4296999au32 {
        // x <= 75.3
        if xb <= 0x40000000u32 {
            // x <= 2
            if xb <= 0x3faccccdu32 {
                // x <= 1.35
                return y0f_near_zero(f32::from_bits(xb));
            }
            // transient zone from 1.35 to 2 have bad behavior for log poly already,
            // and not yet good to be easily covered, thus it use its own poly
            return y0_transient_area(x);
        }

        return y0f_small_argument_path(f32::from_bits(xb));
    }

    // Exceptions:
    let xb = x.to_bits();
    if xb == 0x5023e87f {
        return f32::from_bits(0x28085b2d);
    } else if xb == 0x48171521 {
        return f32::from_bits(0x2bd244ba);
    } else if xb == 0x4398c299 {
        return f32::from_bits(0x32c730db);
    } else if xb == 0x7f0e5a38 {
        return f32::from_bits(0x131f680b);
    } else if xb == 0x6ef9be45 {
        return f32::from_bits(0x987d8a8f);
    }

    y0f_asympt(x)
}

/**
Generated by SageMath:
Evaluates:
Y0(x) = 2/pi*(euler_gamma + log(x/2))*J0(x) - sum((-1)^m*(x/2)^(2*m)/(m!)^2*sum(1+1/2 + ... 1/m))
expressed as:
Y0(x)=log(x)*W0(x) - Z0(x)
```python
from sage.all import *

R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
N = 10  # Number of terms (adjust as needed)
gamma = RealField(300)(euler_gamma)
d2 = RealField(300)(2)
pi = RealField(300).pi()

# Define J0(x) Taylor expansion at x = 0
def j_series(n, x):
    return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)])

J0_series = j_series(0, x)

def z_series(x):
    return sum([(-1)**m * (x/2)**(ZZ(2)*ZZ(m)) / ZZ(m).factorial()**ZZ(2) * sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1)) for m in range(1, N)])

W0 = (d2/pi) * J0_series
Z0 = -gamma * (d2/pi) * J0_series + RealField(300)(2).log() * (d2/pi) * J0_series + (d2/pi) * z_series(x)

# see the series
print(W0)
print(Z0)
```
**/
#[inline]
fn y0f_near_zero(x: f32) -> f32 {
    const W: [u64; 10] = [
        0x3fe45f306dc9c883,
        0xbfc45f306dc9c883,
        0x3f845f306dc9c883,
        0xbf321bb945252402,
        0x3ed21bb945252402,
        0xbe672db9f21b0f5f,
        0x3df49a6c656d62ff,
        0xbd7ae90af76a4d0f,
        0x3cfae90af76a4d0f,
        0xbc754331c053fdad,
    ];
    let dx = x as f64;
    let x2 = dx * dx;
    let w0 = f_polyeval10(
        x2,
        f64::from_bits(W[0]),
        f64::from_bits(W[1]),
        f64::from_bits(W[2]),
        f64::from_bits(W[3]),
        f64::from_bits(W[4]),
        f64::from_bits(W[5]),
        f64::from_bits(W[6]),
        f64::from_bits(W[7]),
        f64::from_bits(W[8]),
        f64::from_bits(W[9]),
    );
    const Z: [u64; 10] = [
        0x3fb2e4d699cbd01f,
        0xbfc6bbcb41034286,
        0x3f9075b1bbf41364,
        0xbf41a6206b7b973d,
        0x3ee3e99794203bbd,
        0xbe7bce4a600d3ea4,
        0x3e0a6ee796b871b6,
        0xbd92393d82c6b2e4,
        0x3d131085da82054c,
        0xbc8f4ed4b492ebcc,
    ];
    let z0 = f_polyeval10(
        x2,
        f64::from_bits(Z[0]),
        f64::from_bits(Z[1]),
        f64::from_bits(Z[2]),
        f64::from_bits(Z[3]),
        f64::from_bits(Z[4]),
        f64::from_bits(Z[5]),
        f64::from_bits(Z[6]),
        f64::from_bits(Z[7]),
        f64::from_bits(Z[8]),
        f64::from_bits(Z[9]),
    );
    let w_log = fast_logf(x);
    f_fmla(w0, w_log, -z0) as f32
}

#[inline]
fn y0_transient_area(x: f32) -> f32 {
    let dx = x as f64;
    // first Y0 bessel zero
    const ZERO: DoubleDouble =
        DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243));
    let r = (dx - ZERO.hi) - ZERO.lo;
    /*
        Poly generated by Wolfram Matematica:
        <<FunctionApproximations`
        ClearAll["Global`*"]
        f[x_]:= BesselY[0,x + 2.1971413260310170351490335626990]
        {approx,error} = MiniMaxApproximation[f[x],{x,{ 1.35 - 2.1971413260310170351490335626990, 2 - 2.1971413260310170351490335626990 },17,0},WorkingPrecision->120]
        poly=error[[1]];
        coeffs=CoefficientList[poly,x];
        TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
    */
    let p = f_polyeval18(
        r,
        f64::from_bits(0x3fe0aa48442f8375),
        f64::from_bits(0x3de601d3b959b8d8),
        f64::from_bits(0xbfd0aa4840bb8529),
        f64::from_bits(0x3fa439fc16d4835e),
        f64::from_bits(0x3f80d2dcd97d2b4f),
        f64::from_bits(0x3f4f833368f9f047),
        f64::from_bits(0xbf541a702ee92277),
        f64::from_bits(0x3f3abc113cf0f4da),
        f64::from_bits(0xbefac1ded6f17ba8),
        f64::from_bits(0x3f33ef372e24df82),
        f64::from_bits(0x3f3bf8b42322df40),
        f64::from_bits(0x3f4582f9daec9ca7),
        f64::from_bits(0x3f479fc07175494e),
        f64::from_bits(0x3f4477a5e32b723a),
        f64::from_bits(0x3f39fbfd6a6d6f0c),
        f64::from_bits(0x3f2760a66816527b),
        f64::from_bits(0x3f0a68fdeeba224f),
        f64::from_bits(0x3edd78c6c87089e1),
    );
    p as f32
}

/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn y0f_small_argument_path(x: f32) -> f32 {
    let x_abs = x as f64;

    // let avg_step = 74.607799 / 47.0;
    // let inv_step = 1.0 / avg_step;

    const INV_STEP: f64 = 0.6299609508652038;

    let fx = x_abs * INV_STEP;
    const Y0_ZEROS_COUNT: f64 = (Y0_ZEROS.len() - 1) as f64;
    let idx0 = unsafe { fx.min(Y0_ZEROS_COUNT).to_int_unchecked::<usize>() };
    let idx1 = unsafe {
        fx.cpu_ceil()
            .min(Y0_ZEROS_COUNT)
            .to_int_unchecked::<usize>()
    };

    let found_zero0 = DoubleDouble::from_bit_pair(Y0_ZEROS[idx0]);
    let found_zero1 = DoubleDouble::from_bit_pair(Y0_ZEROS[idx1]);

    let dist0 = (found_zero0.hi - x_abs).abs();
    let dist1 = (found_zero1.hi - x_abs).abs();

    let (found_zero, idx, dist) = if dist0 < dist1 {
        (found_zero0, idx0, dist0)
    } else {
        (found_zero1, idx1, dist1)
    };

    if idx == 0 {
        // Really should not happen here, but if it is then to log expansion
        return y0f_near_zero(x);
    }

    // We hit exact zero, value, better to return it directly
    if dist == 0. {
        return f64::from_bits(Y0_ZEROS_VALUES[idx]) as f32;
    }

    let c = &Y0F_COEFFS[idx - 1];

    let r = (x_abs - found_zero.hi) - found_zero.lo;

    let p = f_polyeval18(
        r,
        f64::from_bits(c[0]),
        f64::from_bits(c[1]),
        f64::from_bits(c[2]),
        f64::from_bits(c[3]),
        f64::from_bits(c[4]),
        f64::from_bits(c[5]),
        f64::from_bits(c[6]),
        f64::from_bits(c[7]),
        f64::from_bits(c[8]),
        f64::from_bits(c[9]),
        f64::from_bits(c[10]),
        f64::from_bits(c[11]),
        f64::from_bits(c[12]),
        f64::from_bits(c[13]),
        f64::from_bits(c[14]),
        f64::from_bits(c[15]),
        f64::from_bits(c[16]),
        f64::from_bits(c[17]),
    );

    p as f32
}

/*
   Evaluates:
   Y0 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
*/
#[inline]
fn y0f_asympt(x: f32) -> f32 {
    let dx = x as f64;

    let alpha = j0f_asympt_alpha(dx);
    let beta = j0f_asympt_beta(dx);

    let angle = rem2pif_any(x);

    const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
    const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);

    let x0pi34 = MPI_OVER_4 - alpha;
    let r0 = angle + x0pi34;

    let m_cos = sin_small(r0);

    let z0 = beta * m_cos;
    let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);

    (scale * z0) as f32
}

#[cfg(test)]
mod tests {
    use crate::f_y0f;

    #[test]
    fn test_y0f() {
        assert_eq!(f_y0f(90.5), 0.08254846);
        assert_eq!(f_y0f(77.5), 0.087678276);
        assert_eq!(f_y0f(1.5), 0.3824489);
        assert_eq!(f_y0f(0.5), -0.44451874);
        assert!(f_y0f(-1.).is_nan());
        assert_eq!(f_y0f(0.), f32::NEG_INFINITY);
        assert_eq!(f_y0f(-0.), f32::NEG_INFINITY);
        assert_eq!(f_y0f(f32::INFINITY), 0.);
        assert!(f_y0f(f32::NEG_INFINITY).is_nan());
    }
}