gam_gpu/
numerics_host.rs

1//! Host-side scalar special functions shared by the CPU parity references of
2//! the GPU backends.
3//!
4//! The CUDA kernels emit their own NVRTC-visible numerics (see
5//! [`crate::numerics_device`]); this module is the matching **host** side
6//! used by the CPU parity oracles (`bms_flex_row`'s test oracle) and the
7//! CPU reference path (`pirls_row`'s probit CDF). Keeping a single definition
8//! here means the host `erfc` cannot drift between backends.
9
10/// Complementary error function `erfc(x) = 1 − erf(x)` evaluated on the host.
11///
12/// Routes to `libm::erfc`, the SunOS msun double-precision implementation
13/// (accurate to within ~1 ulp across the entire real line). The CUDA kernel
14/// side calls device `erfc`, which is itself msun-derived, so the host CPU
15/// reference matches the device path to within a ULP. The previous
16/// branchless Cody 1969 Chebyshev rational here was only ~1.2e-7 accurate
17/// in relative terms; that ate seven digits of every probit `Mills =
18/// φ/Φ = pdf / (½·erfc(-x/√2))` evaluation and made any sufficiently
19/// tight finite-difference probe of `∂neglog/∂e = -w·s·Mills` (which the
20/// analytic side computes from this same `cdf`, while the FD side
21/// differences `log cdf` and cancels the erfc bias) break against itself
22/// at the ~2e-7 floor instead of the genuine 5-point-stencil truncation
23/// floor near 1e-12.
24pub fn erfc(x: f64) -> f64 {
25    libm::erfc(x)
26}
gam_gpu/numerics_host.rs

gam_gpu/
numerics_host.rs