pxfm 0.1.28

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::bessel::j0f::j1f_rsqrt;
use crate::bessel::j1f::{j1f_asympt_alpha, j1f_asympt_beta};
use crate::bessel::trigo_bessel::cos_small;
use crate::bessel::y1f_coeffs::{Y1_ZEROS, Y1_ZEROS_VALUES, Y1F_COEFFS};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::logs::fast_logf;
use crate::polyeval::{f_polyeval10, f_polyeval18, f_polyeval19};
use crate::rounding::CpuCeil;
use crate::sincos_reduce::rem2pif_any;

/// Bessel of the second kind of order 1 (Y1)
///
/// Max ULP 0.5
pub fn f_y1f(x: f32) -> f32 {
    let ux = x.to_bits();
    if ux >= 0xffu32 << 23 || ux == 0 {
        // |x| == 0, |x| == inf, |x| == NaN, x < 0
        if ux.wrapping_shl(1) == 0 {
            // |x| == 0
            return f32::NEG_INFINITY;
        }

        if x.is_infinite() {
            if x.is_sign_negative() {
                return f32::NAN;
            }
            return 0.;
        }
        return x + f32::NAN; // x == NaN
    }

    let xb = x.to_bits();

    if xb <= 0x424e0000u32 {
        // x <= 51.5
        if xb <= 0x40000000u32 {
            // x <= 2
            if xb <= 0x3fb5c28fu32 {
                // x <= 1.42
                return y1f_near_zero(x);
            }
            // transient zone from 1.42 to 2 have bad behavior for log poly already,
            // and not yet good to be easily covered, thus it use its own poly
            return y1_transient_area(x);
        }
        return y1f_small_argument_path(x);
    }

    // Exceptions
    let bx = x.to_bits();
    if bx == 0x47037a3d {
        return f32::from_bits(0x2deededb);
    } else if bx == 0x65ce46e4 {
        return f32::from_bits(0x9eed85c4);
    } else if bx == 0x6bf68a7b {
        return f32::from_bits(0x9dc70a09);
    } else if bx == 0x76d84625 {
        return f32::from_bits(0x15d7a68b);
    } else if bx == 0x7e3dcda0 {
        return f32::from_bits(0x12b81111);
    }

    y1f_asympt(x)
}

/**
Generated by SageMath:
Evaluates:
y2 = -J1(x)*log(x) + 1/x * (1 - sum((-1)^m*(H(m)+H(m-1))/(2^m*m!*(m-1)!)*x^(2*m))
Y1(x) = 2/pi*(-y2(x)+(euler_gamma - log(2))*J1(x))
expressed as:
Y1(x)=log(x)*W1(x) - Z1(x) - 2/(pi*x)
```python
from sage.all import *

R = LaurentSeriesRing(RealField(300), 'x',default_prec=300)
x = R.gen()
N = 16  # Number of terms (adjust as needed)
gamma = RealField(300)(euler_gamma)
d2 = RealField(300)(2)
pi = RealField(300).pi()
log2 = RealField(300)(2).log()

def j_series(n, x):
    return sum([(-1)**m * (x/2)**(ZZ(n) + ZZ(2)*ZZ(m)) / (ZZ(m).factorial() * (ZZ(m) + ZZ(n)).factorial()) for m in range(N)])

J1_series = j_series(1, x)

def harmony(m):
    return sum(RealField(300)(1)/RealField(300)(k) for k in range(1, m+1))

def z_series(x):
    return sum([(-1)**m * (x)**(ZZ(2)*ZZ(m)) / (ZZ(2)**(2*m) * ZZ(m).factorial() * (ZZ(m) - ZZ(1)).factorial()) * (harmony(m) + harmony(m - 1)) for m in range(1, N)])

W1 = d2/pi * J1_series
Z1 = -(d2/(x*pi) * z_series(x) + d2/pi * gamma * J1_series(x) - d2/pi * log2 * J1_series(x))

# see the series
print(W0)
print(Z0)
```
See ./notes/bessel_y1_taylor.ipynb for generation
**/
#[inline]
fn y1f_near_zero(x: f32) -> f32 {
    const W: [u64; 10] = [
        0x3fd45f306dc9c883,
        0xbfa45f306dc9c883,
        0x3f5b2995e7b7b604,
        0xbf021bb945252402,
        0x3e9cf9286ea1d337,
        0xbe2ee7a29824147f,
        0x3db78be9987d036d,
        0xbd3ae90af76a4d0f,
        0x3cb7eb97f85e7d62,
        0xbc31028e3376648a,
    ];
    let dx = x as f64;
    let x2 = dx * dx;
    let w0 = f_polyeval10(
        x2,
        f64::from_bits(W[0]),
        f64::from_bits(W[1]),
        f64::from_bits(W[2]),
        f64::from_bits(W[3]),
        f64::from_bits(W[4]),
        f64::from_bits(W[5]),
        f64::from_bits(W[6]),
        f64::from_bits(W[7]),
        f64::from_bits(W[8]),
        f64::from_bits(W[9]),
    ) * dx;
    const Z: [u64; 10] = [
        0x3fc91866143cbc8a,
        0xbfabd3975c75b4a7,
        0x3f6835b97894be5b,
        0xbf12c7dbffcde97d,
        0x3eb0a780ac776eac,
        0xbe432e5a4ddeea30,
        0x3dcf0ce34d2066a6,
        0xbd52a4e1aea45c18,
        0x3cd1474ade9154ac,
        0xbc4978ba84f218c0,
    ];
    let z0 = f_polyeval10(
        x2,
        f64::from_bits(Z[0]),
        f64::from_bits(Z[1]),
        f64::from_bits(Z[2]),
        f64::from_bits(Z[3]),
        f64::from_bits(Z[4]),
        f64::from_bits(Z[5]),
        f64::from_bits(Z[6]),
        f64::from_bits(Z[7]),
        f64::from_bits(Z[8]),
        f64::from_bits(Z[9]),
    ) * dx;
    let w_log = fast_logf(x);
    const TWO_OVER_PI: f64 = f64::from_bits(0x3fe45f306dc9c883);
    let recip = 1. / dx;
    let z = f_fmla(w0, w_log, -z0);
    f_fmla(recip, -TWO_OVER_PI, z) as f32
}

#[inline]
fn y1_transient_area(x: f32) -> f32 {
    let dx = x as f64;
    // first Y0 bessel zero
    const ZERO: DoubleDouble =
        DoubleDouble::from_bit_pair((0xbc8bd1e50d219bfd, 0x400193bed4dff243));
    let r = (dx - ZERO.hi) - ZERO.lo;
    /*
        Poly generated by Wolfram Matematica:
        <<FunctionApproximations`
        ClearAll["Global`*"]
        f[x_]:= BesselY[1,x + 2.1971413260310170351490335626990]
        {approx,error} = MiniMaxApproximation[f[x],{x,{1.42 - 2.1971413260310170351490335626990, 2 - 2.1971413260310170351490335626990 },17,0},WorkingPrecision->120]
        poly=error[[1]];
        coeffs=CoefficientList[poly,x];
        TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50}, ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
    */
    let p = f_polyeval18(
        r,
        f64::from_bits(0x3d9b15a8283b069b),
        f64::from_bits(0x3fe0aa484455fd09),
        f64::from_bits(0xbfbe56f80802fa38),
        f64::from_bits(0xbfa0d2ac9d0409ad),
        f64::from_bits(0xbf73a619b3551650),
        f64::from_bits(0x3f7e6c480057ecbb),
        f64::from_bits(0xbf650dc773a5df4d),
        f64::from_bits(0x3f531e9ccab7d4da),
        f64::from_bits(0xbf29b76999169b0e),
        f64::from_bits(0x3f509c829abceaf7),
        f64::from_bits(0x3f575aee5697c4d8),
        f64::from_bits(0x3f63f7f9598be176),
        f64::from_bits(0x3f67a6ae61541282),
        f64::from_bits(0x3f665e6d3de19021),
        f64::from_bits(0x3f5ee8837b9197f6),
        f64::from_bits(0x3f4e6924f270fd7e),
        f64::from_bits(0x3f32ca61e5b74925),
        f64::from_bits(0x3f0725735bc3890b),
    );
    p as f32
}

/// This method on small range searches for nearest zero or extremum.
/// Then picks stored series expansion at the point end evaluates the poly at the point.
#[inline]
fn y1f_small_argument_path(x: f32) -> f32 {
    let x_abs = x as f64;

    // let avg_step = 51.03 / 33.0;
    // let inv_step = 1.0 / avg_step;
    //
    // println!("inv_step {}", inv_step);

    const INV_STEP: f64 = 0.6466784244562023;

    let fx = x_abs * INV_STEP;
    const Y1_ZEROS_COUNT: f64 = (Y1_ZEROS.len() - 1) as f64;
    let idx0 = unsafe { fx.min(Y1_ZEROS_COUNT).to_int_unchecked::<usize>() };
    let idx1 = unsafe {
        fx.cpu_ceil()
            .min(Y1_ZEROS_COUNT)
            .to_int_unchecked::<usize>()
    };

    let found_zero0 = DoubleDouble::from_bit_pair(Y1_ZEROS[idx0]);
    let found_zero1 = DoubleDouble::from_bit_pair(Y1_ZEROS[idx1]);

    let dist0 = (found_zero0.hi - x_abs).abs();
    let dist1 = (found_zero1.hi - x_abs).abs();

    let (found_zero, idx, dist) = if dist0 < dist1 {
        (found_zero0, idx0, dist0)
    } else {
        (found_zero1, idx1, dist1)
    };

    if idx == 0 {
        // Really should not happen here, but if it is then to log expansion
        return y1f_near_zero(x);
    }

    // We hit exact zero, value, better to return it directly
    if dist == 0. {
        return f64::from_bits(Y1_ZEROS_VALUES[idx]) as f32;
    }

    let c = &Y1F_COEFFS[idx - 1];

    let r = (x_abs - found_zero.hi) - found_zero.lo;

    let p = f_polyeval19(
        r,
        f64::from_bits(c[0]),
        f64::from_bits(c[1]),
        f64::from_bits(c[2]),
        f64::from_bits(c[3]),
        f64::from_bits(c[4]),
        f64::from_bits(c[5]),
        f64::from_bits(c[6]),
        f64::from_bits(c[7]),
        f64::from_bits(c[8]),
        f64::from_bits(c[9]),
        f64::from_bits(c[10]),
        f64::from_bits(c[11]),
        f64::from_bits(c[12]),
        f64::from_bits(c[13]),
        f64::from_bits(c[14]),
        f64::from_bits(c[15]),
        f64::from_bits(c[16]),
        f64::from_bits(c[17]),
        f64::from_bits(c[18]),
    );

    p as f32
}

/*
   Evaluates:
   Y1 = sqrt(2/(PI*x)) * beta(x) * sin(x - 3*PI/4 - alpha(x))

   Discarding 1/2*PI gives:
   Y1 = sqrt(2/(PI*x)) * beta(x) * (-cos(x - PI/4 - alpha(x)))
*/
#[inline]
fn y1f_asympt(x: f32) -> f32 {
    let dx = x as f64;

    let alpha = j1f_asympt_alpha(dx);
    let beta = j1f_asympt_beta(dx);

    let angle = rem2pif_any(x);

    const SQRT_2_OVER_PI: f64 = f64::from_bits(0x3fe9884533d43651);
    const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);

    let x0pi34 = MPI_OVER_4 - alpha;
    let r0 = angle + x0pi34;

    let m_cos = -cos_small(r0);

    let z0 = beta * m_cos;
    let scale = SQRT_2_OVER_PI * j1f_rsqrt(dx);

    (scale * z0) as f32
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_bessel_zero() {
        assert_eq!(f_y1f(700.76), 0.024876066);
        assert_eq!(f_y1f(35.76), 0.121432826);
        assert_eq!(f_y1f(1.76), -0.24787569);
        assert_eq!(f_y1f(0.87), -0.9030042);
        assert_eq!(f_y1f(f32::INFINITY), 0.0);
        assert!(f_y1f(f32::NEG_INFINITY).is_nan());
        assert!(f_y1f(f32::NAN).is_nan());
    }
}