numr 0.5.1

High-performance numerical computing with multi-backend GPU acceleration (CPU/CUDA/WebGPU)
Documentation
//! Error function implementations (erf, erfc, erfinv)

// ============================================================================
// Error Function Implementation
// ============================================================================

/// Compute erf(x) to full f64 precision.
///
/// Uses Maclaurin series for small |x| and Laplace continued fraction
/// for erfc at larger |x|. Both are mathematically guaranteed to converge.
/// Accuracy: ~1e-15 relative error (full f64 precision).
pub fn erf_scalar(x: f64) -> f64 {
    if x.is_nan() {
        return f64::NAN;
    }
    if x == 0.0 {
        return 0.0;
    }
    if x.is_infinite() {
        return if x > 0.0 { 1.0 } else { -1.0 };
    }

    let sign = if x < 0.0 { -1.0 } else { 1.0 };
    let a = x.abs();

    if a < 3.0 {
        // Maclaurin series: erf(x) = (2/sqrt(pi)) * sum_{n=0}^inf (-1)^n * x^(2n+1) / (n! * (2n+1))
        // Converges well for |x| < 3 with ~30 terms
        let x2 = a * a;
        let mut term = a; // first term: x^1 / (0! * 1) = x
        let mut sum = a;
        for n in 1..50 {
            term *= -x2 / (n as f64);
            let contribution = term / (2 * n + 1) as f64;
            sum += contribution;
            if contribution.abs() < sum.abs() * 1e-16 {
                break;
            }
        }
        const TWO_OVER_SQRT_PI: f64 = std::f64::consts::FRAC_2_SQRT_PI;
        sign * sum * TWO_OVER_SQRT_PI
    } else if a < 6.0 {
        // Laplace continued fraction for erfc(x):
        // erfc(x) = exp(-x^2)/sqrt(pi) * 1/(x + 0.5/(x + 1/(x + 1.5/(x + ...))))
        // Evaluate from the tail using backward recurrence
        let x2 = a * a;
        let n_terms = 50;
        let mut f = 0.0_f64;
        for n in (1..=n_terms).rev() {
            f = (n as f64) * 0.5 / (a + f);
        }
        let cf = 1.0 / (a + f);
        const FRAC_1_SQRT_PI: f64 = 0.5641895835477563; // 1/sqrt(pi)
        let erfc_val = (-x2).exp() * FRAC_1_SQRT_PI * cf;
        sign * (1.0 - erfc_val)
    } else {
        // Very large |x|: erf(x) = ±1 (erfc < 2e-17)
        sign
    }
}

/// Compute erfc(x) = 1 - erf(x) directly for numerical stability.
pub fn erfc_scalar(x: f64) -> f64 {
    if x >= 0.0 {
        1.0 - erf_scalar(x)
    } else {
        1.0 + erf_scalar(-x)
    }
}

// ============================================================================
// Inverse Error Function
// ============================================================================

/// Compute erfinv(x) using the relationship with the normal quantile function.
///
/// Uses: erfinv(x) = ndtri((1+x)/2) / sqrt(2)
/// where ndtri is the inverse of the standard normal CDF.
///
/// The ndtri approximation uses the Acklam algorithm
/// with Halley refinement for high accuracy.
///
/// Accuracy: ~1e-12 relative error.
pub fn erfinv_scalar(p: f64) -> f64 {
    if p <= -1.0 {
        return f64::NEG_INFINITY;
    }
    if p >= 1.0 {
        return f64::INFINITY;
    }
    if p == 0.0 {
        return 0.0;
    }
    if p.is_nan() {
        return f64::NAN;
    }

    // erfinv(x) = ndtri((1+x)/2) / sqrt(2)
    // We compute ndtri directly using Beasley-Springer-Moro algorithm
    let prob = (1.0 + p) / 2.0; // Map from (-1,1) to (0,1)

    let ndtri_result = ndtri_scalar(prob);

    ndtri_result * std::f64::consts::FRAC_1_SQRT_2
}

/// Inverse of the standard normal CDF (quantile function).
///
/// Uses the Acklam algorithm (Peter J. Acklam, 2010) which provides
/// high accuracy across the entire range.
fn ndtri_scalar(p: f64) -> f64 {
    if p <= 0.0 {
        return f64::NEG_INFINITY;
    }
    if p >= 1.0 {
        return f64::INFINITY;
    }
    if p == 0.5 {
        return 0.0;
    }

    // Acklam's algorithm coefficients
    // Rational approximation for the lower region
    const A: [f64; 6] = [
        -3.969683028665376e+01,
        2.209460984245205e+02,
        -2.759285104469687e+02,
        1.383577518672690e+02,
        -3.066479806614716e+01,
        2.506628277459239e+00,
    ];
    const B: [f64; 5] = [
        -5.447609879822406e+01,
        1.615858368580409e+02,
        -1.556989798598866e+02,
        6.680131188771972e+01,
        -1.328068155288572e+01,
    ];

    // Rational approximation for the central region
    const C: [f64; 6] = [
        -7.784894002430293e-03,
        -3.223964580411365e-01,
        -2.400758277161838e+00,
        -2.549732539343734e+00,
        4.374664141464968e+00,
        2.938163982698783e+00,
    ];
    const D: [f64; 4] = [
        7.784695709041462e-03,
        3.224671290700398e-01,
        2.445134137142996e+00,
        3.754408661907416e+00,
    ];

    // Break-points for the regions
    const P_LOW: f64 = 0.02425;
    const P_HIGH: f64 = 1.0 - P_LOW;

    let mut x = if p < P_LOW {
        // Lower region
        let q = (-2.0 * p.ln()).sqrt();
        (((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5])
            / ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1.0)
    } else if p <= P_HIGH {
        // Central region
        let q = p - 0.5;
        let r = q * q;
        (((((A[0] * r + A[1]) * r + A[2]) * r + A[3]) * r + A[4]) * r + A[5]) * q
            / (((((B[0] * r + B[1]) * r + B[2]) * r + B[3]) * r + B[4]) * r + 1.0)
    } else {
        // Upper region
        let q = (-2.0 * (1.0 - p).ln()).sqrt();
        -(((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5])
            / ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1.0)
    };

    // Halley refinement for improved accuracy
    // f(x) = Phi(x) - p
    // f'(x) = phi(x)
    const SQRT_2PI: f64 = 2.5066282746310002;

    for _ in 0..3 {
        let phi_x = (-0.5 * x * x).exp() / SQRT_2PI;

        if phi_x < 1e-300 {
            break;
        }

        let cdf_x = 0.5 * (1.0 + erf_scalar(x * std::f64::consts::FRAC_1_SQRT_2));
        let err = cdf_x - p;

        if err.abs() < 1e-15 {
            break;
        }

        let step = err / phi_x;
        let halley_factor = 1.0 + 0.5 * x * step;

        if halley_factor.abs() > 0.5 {
            x -= step / halley_factor;
        } else {
            x -= step;
        }
    }

    x
}