use ndarray::{Array1, Array2, ArrayView1};
use wide::f64x4;
#[derive(Clone, Debug)]
pub struct ClosureFamily {
pub harmonics: usize,
pub window: f64,
}
#[inline]
fn recurrence_seed(phi: f64) -> (f64, f64, f64, f64) {
let (sh, ch) = (0.5 * phi).sin_cos();
let alpha = 2.0 * sh * sh; let beta = 2.0 * sh * ch; let cos_phi = ch * ch - sh * sh; (alpha, beta, cos_phi, beta)
}
impl ClosureFamily {
pub fn new(harmonics: usize, window: f64) -> Self {
Self { harmonics, window }
}
#[inline]
pub fn raw_dim(&self) -> usize {
1 + 2 * self.harmonics
}
#[inline]
fn write_row_jet(&self, s: f64, gamma: f64, value: &mut [f64], dg: &mut [f64], dgg: &mut [f64]) {
value[0] = 1.0;
if self.harmonics == 0 {
return;
}
let (alpha, beta, mut cs, mut sn) = recurrence_seed(gamma * s);
for m in 1..=self.harmonics {
let ms = m as f64 * s; let ci = 2 * m - 1;
let si = 2 * m;
value[ci] = cs;
dg[ci] = -sn * ms;
dgg[ci] = (-cs * ms) * ms;
value[si] = sn;
dg[si] = cs * ms;
dgg[si] = (-sn * ms) * ms;
let cn = cs - (alpha * cs + beta * sn);
let sn1 = sn - (alpha * sn - beta * cs);
cs = cn;
sn = sn1;
}
}
#[inline]
fn write_row_value(&self, s: f64, gamma: f64, value: &mut [f64]) {
value[0] = 1.0;
if self.harmonics == 0 {
return;
}
let (alpha, beta, mut cs, mut sn) = recurrence_seed(gamma * s);
for m in 1..=self.harmonics {
value[2 * m - 1] = cs;
value[2 * m] = sn;
let cn = cs - (alpha * cs + beta * sn);
let sn1 = sn - (alpha * sn - beta * cs);
cs = cn;
sn = sn1;
}
}
pub fn row_jet(&self, s: f64, gamma: f64) -> (Array1<f64>, Array1<f64>, Array1<f64>) {
let d = self.raw_dim();
let mut value = Array1::zeros(d);
let mut dg = Array1::zeros(d);
let mut dgg = Array1::zeros(d);
self.write_row_jet(
s,
gamma,
value.as_slice_mut().expect("contiguous"),
dg.as_slice_mut().expect("contiguous"),
dgg.as_slice_mut().expect("contiguous"),
);
(value, dg, dgg)
}
pub fn design(&self, s: ArrayView1<'_, f64>, gamma: f64) -> Array2<f64> {
let n = s.len();
let d = self.raw_dim();
let h = self.harmonics;
let mut phi = Array2::zeros((n, d));
let pv = phi.as_slice_mut().expect("contiguous design");
let mut i = 0;
if h > 0 {
while i + 4 <= n {
let s4 = [s[i], s[i + 1], s[i + 2], s[i + 3]];
let (alpha, beta, mut cc, mut sn) = seed_lanes(gamma, &s4);
for l in 0..4 {
pv[(i + l) * d] = 1.0;
}
for m in 1..=h {
let (ci, si) = (2 * m - 1, 2 * m);
let cca = cc.to_array();
let sna = sn.to_array();
for l in 0..4 {
let base = (i + l) * d;
pv[base + ci] = cca[l];
pv[base + si] = sna[l];
}
let cn = cc - (alpha * cc + beta * sn);
let sn1 = sn - (alpha * sn - beta * cc);
cc = cn;
sn = sn1;
}
i += 4;
}
}
while i < n {
self.write_row_value(s[i], gamma, &mut pv[i * d..i * d + d]);
i += 1;
}
phi
}
pub fn design_jet(
&self,
s: ArrayView1<'_, f64>,
gamma: f64,
) -> (Array2<f64>, Array2<f64>, Array2<f64>) {
let n = s.len();
let d = self.raw_dim();
let h = self.harmonics;
let mut phi = Array2::zeros((n, d));
let mut dphi = Array2::zeros((n, d));
let mut ddphi = Array2::zeros((n, d));
let pv = phi.as_slice_mut().expect("contiguous design");
let dv = dphi.as_slice_mut().expect("contiguous d/dγ");
let ddv = ddphi.as_slice_mut().expect("contiguous d²/dγ²");
let mut i = 0;
if h > 0 {
while i + 4 <= n {
let s4 = [s[i], s[i + 1], s[i + 2], s[i + 3]];
let (alpha, beta, mut cc, mut sn) = seed_lanes(gamma, &s4);
let svec = f64x4::from(s4);
for l in 0..4 {
pv[(i + l) * d] = 1.0;
}
for m in 1..=h {
let (ci, si) = (2 * m - 1, 2 * m);
let ms = svec * f64x4::splat(m as f64); let cca = cc.to_array();
let sna = sn.to_array();
let dgc = (-sn * ms).to_array();
let dgs = (cc * ms).to_array();
let ddc = ((-cc * ms) * ms).to_array();
let dds = ((-sn * ms) * ms).to_array();
for l in 0..4 {
let base = (i + l) * d;
pv[base + ci] = cca[l];
pv[base + si] = sna[l];
dv[base + ci] = dgc[l];
dv[base + si] = dgs[l];
ddv[base + ci] = ddc[l];
ddv[base + si] = dds[l];
}
let cn = cc - (alpha * cc + beta * sn);
let sn1 = sn - (alpha * sn - beta * cc);
cc = cn;
sn = sn1;
}
i += 4;
}
}
while i < n {
let lo = i * d;
self.write_row_jet(
s[i],
gamma,
&mut pv[lo..lo + d],
&mut dv[lo..lo + d],
&mut ddv[lo..lo + d],
);
i += 1;
}
(phi, dphi, ddphi)
}
}
#[inline]
fn seed_lanes(gamma: f64, s4: &[f64; 4]) -> (f64x4, f64x4, f64x4, f64x4) {
let mut al = [0.0; 4];
let mut be = [0.0; 4];
let mut ca = [0.0; 4];
let mut sa = [0.0; 4];
for l in 0..4 {
let (a, b, c, s) = recurrence_seed(gamma * s4[l]);
al[l] = a;
be[l] = b;
ca[l] = c;
sa[l] = s;
}
(
f64x4::from(al),
f64x4::from(be),
f64x4::from(ca),
f64x4::from(sa),
)
}
pub fn boundary_conductance(gamma: f64) -> (f64, f64, f64) {
let g = gamma.clamp(0.0, 1.0);
let c = 3.0 * g * g - 2.0 * g * g * g;
let cp = 6.0 * g - 6.0 * g * g;
let cpp = 6.0 - 12.0 * g;
(c, cp, cpp)
}
pub fn conductance_penalty_jet(
s_open: &Array2<f64>,
s_wrap: &Array2<f64>,
gamma: f64,
) -> (Array2<f64>, Array2<f64>, Array2<f64>) {
let (c, cp, cpp) = boundary_conductance(gamma);
let s = s_open + &(s_wrap * c);
let ds = s_wrap * cp;
let dds = s_wrap * cpp;
(s, ds, dds)
}
#[derive(Clone, Copy, Debug)]
pub struct ClosureProfileCi {
pub gamma_hat: f64,
pub ci_lo: f64,
pub ci_hi: f64,
pub ci_includes_circle: bool,
pub ci_includes_interval: bool,
pub singular_boundary: bool,
}
fn chi2_1_quantile(level: f64) -> f64 {
let z = inv_std_normal(0.5 * (1.0 + level));
z * z
}
pub fn profile_ci_from_grid(grid: &[(f64, f64)], level: f64) -> Result<ClosureProfileCi, String> {
if grid.len() < 2 {
return Err("closure profile CI needs at least two grid points".into());
}
let half_chi2 = 0.5 * chi2_1_quantile(level);
let (mut gamma_hat, mut v_min) = (grid[0].0, grid[0].1);
for &(g, v) in grid {
if !g.is_finite() || !v.is_finite() {
return Err("closure profile grid has non-finite entries".into());
}
if v < v_min {
v_min = v;
gamma_hat = g;
}
}
let in_set = |v: f64| v - v_min <= half_chi2 + 1e-12;
let mut ci_lo = gamma_hat;
let mut ci_hi = gamma_hat;
for w in grid.windows(2) {
let (g0, v0) = w[0];
let (g1, v1) = w[1];
let (a0, a1) = (in_set(v0), in_set(v1));
if a0 {
ci_lo = ci_lo.min(g0);
ci_hi = ci_hi.max(g0);
}
if a1 {
ci_lo = ci_lo.min(g1);
ci_hi = ci_hi.max(g1);
}
if a0 != a1 {
let target = v_min + half_chi2;
let t = ((target - v0) / (v1 - v0)).clamp(0.0, 1.0);
let g_cross = g0 + t * (g1 - g0);
ci_lo = ci_lo.min(g_cross);
ci_hi = ci_hi.max(g_cross);
}
}
ci_lo = ci_lo.clamp(0.0, 1.0);
ci_hi = ci_hi.clamp(0.0, 1.0);
let ci_includes_circle = ci_hi >= 1.0 - 1e-9;
let ci_includes_interval = ci_lo <= 1e-9;
let singular_boundary = gamma_hat <= 1e-9 && {
let interior = grid.iter().find(|&&(g, _)| g > 1e-9);
interior.map(|&(_, v)| v >= v_min - 1e-9).unwrap_or(false)
};
Ok(ClosureProfileCi {
gamma_hat,
ci_lo,
ci_hi,
ci_includes_circle,
ci_includes_interval,
singular_boundary,
})
}
pub(crate) fn inv_std_normal(p: f64) -> f64 {
if p <= 0.0 {
return f64::NEG_INFINITY;
}
if p >= 1.0 {
return f64::INFINITY;
}
const A: [f64; 6] = [
-3.969_683_028_665_376e1,
2.209_460_984_245_205e2,
-2.759_285_104_469_687e2,
1.383_577_518_672_690e2,
-3.066_479_806_614_716e1,
2.506_628_277_459_239e0,
];
const B: [f64; 5] = [
-5.447_609_879_822_406e1,
1.615_858_368_580_409e2,
-1.556_989_798_598_866e2,
6.680_131_188_771_972e1,
-1.328_068_155_288_572e1,
];
const C: [f64; 6] = [
-7.784_894_002_430_293e-3,
-3.223_964_580_411_365e-1,
-2.400_758_277_161_838e0,
-2.549_732_539_343_734e0,
4.374_664_141_464_968e0,
2.938_163_982_698_783e0,
];
const D: [f64; 4] = [
7.784_695_709_041_462e-3,
3.224_671_290_700_398e-1,
2.445_134_137_142_996e0,
3.754_408_661_907_416e0,
];
const P_LOW: f64 = 0.024_25;
let x = if p < P_LOW {
let q = (-2.0 * p.ln()).sqrt();
(((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5])
/ ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1.0)
} else if p <= 1.0 - P_LOW {
let q = p - 0.5;
let r = q * q;
(((((A[0] * r + A[1]) * r + A[2]) * r + A[3]) * r + A[4]) * r + A[5]) * q
/ (((((B[0] * r + B[1]) * r + B[2]) * r + B[3]) * r + B[4]) * r + 1.0)
} else {
let q = (-2.0 * (1.0 - p).ln()).sqrt();
-(((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5])
/ ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1.0)
};
let e = 0.5 * libm::erfc(-x / std::f64::consts::SQRT_2) - p;
let u = e * (2.0 * std::f64::consts::PI).sqrt() * (0.5 * x * x).exp();
x - u / (1.0 + 0.5 * x * u)
}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::array;
#[test]
fn gamma_one_is_circle_basis() {
let fam = ClosureFamily::new(2, std::f64::consts::TAU);
let s = 1.3_f64;
let (v, _, _) = fam.row_jet(s, 1.0);
assert!((v[0] - 1.0).abs() < 1e-15);
assert!((v[1] - s.cos()).abs() < 1e-14);
assert!((v[2] - s.sin()).abs() < 1e-14);
assert!((v[3] - (2.0 * s).cos()).abs() < 1e-14);
assert!((v[4] - (2.0 * s).sin()).abs() < 1e-14);
}
#[test]
fn basis_gamma_jet_matches_fd() {
let fam = ClosureFamily::new(3, std::f64::consts::TAU);
let s = 0.8_f64;
for &g0 in &[1.0_f64, 0.5, 0.05, 1e-6] {
let (_, dg, dgg) = fam.row_jet(s, g0);
let h = 1e-5;
let (vp, _, _) = fam.row_jet(s, g0 + h);
let (vm, _, _) = fam.row_jet(s, g0 - h);
let (v0, _, _) = fam.row_jet(s, g0);
for j in 0..fam.raw_dim() {
let fd1 = (vp[j] - vm[j]) / (2.0 * h);
let fd2 = (vp[j] - 2.0 * v0[j] + vm[j]) / (h * h);
assert!(
(dg[j] - fd1).abs() < 1e-5,
"d/dγ col {j} at γ={g0}: analytic {} fd {fd1}",
dg[j]
);
assert!(
(dgg[j] - fd2).abs() < 1e-3,
"d²/dγ² col {j} at γ={g0}: analytic {} fd {fd2}",
dgg[j]
);
}
}
}
#[test]
fn conductance_endpoints_and_flat() {
let (c0, cp0, _) = boundary_conductance(0.0);
let (c1, cp1, _) = boundary_conductance(1.0);
assert!(c0.abs() < 1e-15 && (c1 - 1.0).abs() < 1e-15);
assert!(cp0.abs() < 1e-15 && cp1.abs() < 1e-15);
}
#[test]
fn conductance_penalty_interpolates() {
let s_open = array![[2.0, -1.0], [-1.0, 2.0]];
let s_wrap = array![[1.0, -1.0], [-1.0, 1.0]];
let (s0, _, _) = conductance_penalty_jet(&s_open, &s_wrap, 0.0);
let (s1, _, _) = conductance_penalty_jet(&s_open, &s_wrap, 1.0);
assert!((&s0 - &s_open).iter().all(|v| v.abs() < 1e-14));
let circle = &s_open + &s_wrap;
assert!((&s1 - &circle).iter().all(|v| v.abs() < 1e-14));
let g = 0.4;
let (_, ds, _) = conductance_penalty_jet(&s_open, &s_wrap, g);
let h = 1e-6;
let (sp, _, _) = conductance_penalty_jet(&s_open, &s_wrap, g + h);
let (sm, _, _) = conductance_penalty_jet(&s_open, &s_wrap, g - h);
let fd = (&sp - &sm).mapv(|v| v / (2.0 * h));
assert!((&ds - &fd).iter().all(|v| v.abs() < 1e-6));
}
#[test]
fn profile_ci_interior_minimum() {
let v = |g: f64| 100.0 + 50.0 * (g - 0.6).powi(2);
let grid: Vec<(f64, f64)> = (0..=100)
.map(|k| k as f64 / 100.0)
.map(|g| (g, v(g)))
.collect();
let ci = profile_ci_from_grid(&grid, 0.95).unwrap();
assert!((ci.gamma_hat - 0.6).abs() < 0.02, "γ̂ {}", ci.gamma_hat);
assert!(!ci.ci_includes_circle, "CI hi {}", ci.ci_hi);
assert!(!ci.ci_includes_interval, "CI lo {}", ci.ci_lo);
assert!(!ci.singular_boundary);
let want = (chi2_1_quantile(0.95) / (2.0 * 50.0)).sqrt();
assert!(((ci.ci_hi - ci.ci_lo) / 2.0 - want).abs() < 0.02);
}
#[test]
fn profile_ci_includes_circle_at_boundary() {
let v = |g: f64| 10.0 + 30.0 * (g - 1.05).powi(2); let grid: Vec<(f64, f64)> = (0..=100)
.map(|k| k as f64 / 100.0)
.map(|g| (g, v(g)))
.collect();
let ci = profile_ci_from_grid(&grid, 0.95).unwrap();
assert!(ci.ci_includes_circle);
assert!(!ci.singular_boundary);
}
#[test]
fn profile_flags_singular_boundary() {
let v = |g: f64| 10.0 + 20.0 * g; let grid: Vec<(f64, f64)> = (0..=100)
.map(|k| k as f64 / 100.0)
.map(|g| (g, v(g)))
.collect();
let ci = profile_ci_from_grid(&grid, 0.95).unwrap();
assert!((ci.gamma_hat).abs() < 1e-9);
assert!(ci.singular_boundary);
assert!(ci.ci_includes_interval);
}
#[test]
fn chi2_quantile_known_value() {
assert!((chi2_1_quantile(0.95) - 3.841_458_820_694_124).abs() < 1e-6);
}
#[derive(Clone, Copy)]
struct Dd {
hi: f64,
lo: f64,
}
fn two_sum(a: f64, b: f64) -> (f64, f64) {
let s = a + b;
let bb = s - a;
(s, (a - (s - bb)) + (b - bb))
}
fn two_prod(a: f64, b: f64) -> (f64, f64) {
let p = a * b;
(p, a.mul_add(b, -p))
}
fn quick_two_sum(a: f64, b: f64) -> (f64, f64) {
let s = a + b;
(s, b - (s - a))
}
impl Dd {
fn new(hi: f64) -> Dd {
Dd { hi, lo: 0.0 }
}
fn neg(self) -> Dd {
Dd { hi: -self.hi, lo: -self.lo }
}
fn add(self, o: Dd) -> Dd {
let (s, e) = two_sum(self.hi, o.hi);
let (h, l) = quick_two_sum(s, e + self.lo + o.lo);
Dd { hi: h, lo: l }
}
fn sub(self, o: Dd) -> Dd {
self.add(o.neg())
}
fn mul(self, o: Dd) -> Dd {
let (p, e) = two_prod(self.hi, o.hi);
let (h, l) = quick_two_sum(p, e + (self.hi * o.lo + self.lo * o.hi));
Dd { hi: h, lo: l }
}
fn mul_f(self, f: f64) -> Dd {
let (p, e) = two_prod(self.hi, f);
let (h, l) = quick_two_sum(p, e + self.lo * f);
Dd { hi: h, lo: l }
}
fn to_f64(self) -> f64 {
self.hi + self.lo
}
}
const DD_PIO2: Dd = Dd {
hi: 1.5707963267948966,
lo: 6.123233995736766e-17,
};
const DD_TWO_OVER_PI: f64 = 0.6366197723675814;
fn dd_sincos_small(r: Dd) -> (Dd, Dd) {
let x2 = r.mul(r);
let sin_coef: [f64; 8] = [
1.0,
-1.0 / 6.0,
1.0 / 120.0,
-1.0 / 5040.0,
1.0 / 362880.0,
-1.0 / 39916800.0,
1.0 / 6227020800.0,
-1.0 / 1307674368000.0,
];
let cos_coef: [f64; 8] = [
1.0,
-1.0 / 2.0,
1.0 / 24.0,
-1.0 / 720.0,
1.0 / 40320.0,
-1.0 / 3628800.0,
1.0 / 479001600.0,
-1.0 / 87178291200.0,
];
let mut sin = Dd::new(0.0);
let mut cos = Dd::new(0.0);
for k in (0..8).rev() {
sin = sin.mul(x2).add(Dd::new(sin_coef[k]));
cos = cos.mul(x2).add(Dd::new(cos_coef[k]));
}
(r.mul(sin), cos)
}
fn dd_sincos(x: Dd) -> (Dd, Dd) {
let kf = (x.hi * DD_TWO_OVER_PI).round();
let r = x.sub(DD_PIO2.mul_f(kf));
let (s, c) = dd_sincos_small(r);
match (kf as i64).rem_euclid(4) {
0 => (s, c),
1 => (c, s.neg()),
2 => (s.neg(), c.neg()),
_ => (c.neg(), s),
}
}
fn dd_arg(m: usize, gamma: f64, s: f64) -> Dd {
let (p, e) = two_prod(gamma, s);
Dd { hi: p, lo: e }.mul_f(m as f64)
}
#[test]
fn dd_reference_matches_libm_at_small_args() {
for &t in &[0.3_f64, 1.7, 5.5, 12.25, 123.4] {
let (s, c) = dd_sincos(Dd::new(t));
assert!((s.to_f64() - t.sin()).abs() < 1e-14, "sin {t}");
assert!((c.to_f64() - t.cos()).abs() < 1e-14, "cos {t}");
}
}
#[test]
fn recurrence_is_at_least_as_accurate_as_per_harmonic_libm() {
let mut seed: u64 = 0x1234_5678_9abc_def0;
let mut rng = || {
seed ^= seed << 13;
seed ^= seed >> 7;
seed ^= seed << 17;
(seed >> 11) as f64 / (1u64 << 53) as f64
};
for &h in &[4usize, 8, 16, 32, 64, 128] {
let fam = ClosureFamily::new(h, std::f64::consts::TAU);
let mut max_old = 0.0f64;
let mut max_new = 0.0f64;
for _ in 0..2000 {
let s = (rng() * 2.0 - 1.0) * std::f64::consts::TAU;
let gamma = rng();
let (val, dg, dgg) = fam.row_jet(s, gamma);
for m in 1..=h {
let (ts, tc) = dd_sincos(dd_arg(m, gamma, s));
let (tcf, tsf) = (tc.to_f64(), ts.to_f64());
let ms = m as f64 * s;
let (cs_new, sn_new) = (val[2 * m - 1], val[2 * m]);
let (osn, ocs) = (gamma * ms).sin_cos();
max_old = max_old.max((ocs - tcf).abs().max((osn - tsf).abs()));
max_new = max_new.max((cs_new - tcf).abs().max((sn_new - tsf).abs()));
assert_eq!(dg[2 * m - 1], -sn_new * ms);
assert_eq!(dg[2 * m], cs_new * ms);
assert_eq!(dgg[2 * m - 1], (-cs_new * ms) * ms);
assert_eq!(dgg[2 * m], (-sn_new * ms) * ms);
}
}
assert!(
max_new <= 1.1 * max_old,
"H={h}: recurrence abs-err {max_new:.3e} worse than per-harmonic libm {max_old:.3e}"
);
assert!(max_new < 1e-12, "H={h}: recurrence abs-err {max_new:.3e} exceeds 1e-12");
}
}
#[test]
fn simd_design_is_bit_identical_to_scalar_rows() {
for &h in &[0usize, 1, 3, 7, 16] {
let fam = ClosureFamily::new(h, std::f64::consts::TAU);
let n = 11;
let s: Vec<f64> = (0..n).map(|k| (k as f64) * 0.37 - 1.9).collect();
let sv = ndarray::ArrayView1::from(&s);
let gamma = 0.61;
let phi = fam.design(sv, gamma);
let (pj, dj, ddj) = fam.design_jet(sv, gamma);
for (i, &si) in s.iter().enumerate() {
let (v, dgr, ddr) = fam.row_jet(si, gamma);
for j in 0..fam.raw_dim() {
assert_eq!(phi[[i, j]].to_bits(), v[j].to_bits(), "design v ({i},{j})");
assert_eq!(pj[[i, j]].to_bits(), v[j].to_bits(), "design_jet v ({i},{j})");
assert_eq!(dj[[i, j]].to_bits(), dgr[j].to_bits(), "design_jet dγ ({i},{j})");
assert_eq!(ddj[[i, j]].to_bits(), ddr[j].to_bits(), "design_jet d²γ ({i},{j})");
}
}
}
}
}