1use crate::astro::math::special::erf;
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
48pub enum NormalityError {
49 #[error("residual set has a non-finite value")]
51 NonFinite,
52 #[error("residual set too small: need at least {need} values, got {got}")]
55 InsufficientData {
56 need: usize,
58 got: usize,
60 },
61 #[error("residual set has zero variance")]
64 ZeroVariance,
65 #[error("residual set has zero range")]
68 ZeroRange,
69}
70
71#[derive(Debug, Clone, Copy, PartialEq)]
73pub struct MomentStats {
74 pub mean: f64,
76 pub variance: f64,
79 pub skewness: f64,
82 pub kurtosis_excess: f64,
87}
88
89fn central_moments(x: &[f64]) -> Result<(usize, f64, f64, f64, f64), NormalityError> {
92 let n = x.len();
93 if n == 0 {
94 return Err(NormalityError::InsufficientData { need: 1, got: 0 });
95 }
96 for &v in x {
97 if !v.is_finite() {
98 return Err(NormalityError::NonFinite);
99 }
100 }
101 let mut sum = 0.0;
102 for &v in x {
103 sum += v;
104 }
105 let mean = sum / n as f64;
106 let (mut s2, mut s3, mut s4) = (0.0, 0.0, 0.0);
107 for &v in x {
108 let d = v - mean;
109 let d2 = d * d;
110 s2 += d2;
111 s3 += d2 * d;
112 s4 += d2 * d2;
113 }
114 let inv_n = 1.0 / n as f64;
115 Ok((n, mean, s2 * inv_n, s3 * inv_n, s4 * inv_n))
116}
117
118pub fn skewness(x: &[f64], bias: bool) -> Result<f64, NormalityError> {
125 let (n, _mean, m2, m3, _m4) = central_moments(x)?;
126 #[allow(clippy::neg_cmp_op_on_partial_ord)]
127 if !(m2 > 0.0) {
128 return Err(NormalityError::ZeroVariance);
129 }
130 let g1 = m3 / m2.powf(1.5);
131 if bias {
132 return Ok(g1);
133 }
134 if n < 3 {
135 return Err(NormalityError::InsufficientData { need: 3, got: n });
136 }
137 let nf = n as f64;
138 Ok(((nf - 1.0) * nf).sqrt() / (nf - 2.0) * g1)
139}
140
141pub fn kurtosis(x: &[f64], fisher: bool, bias: bool) -> Result<f64, NormalityError> {
149 let (n, _mean, m2, _m3, m4) = central_moments(x)?;
150 #[allow(clippy::neg_cmp_op_on_partial_ord)]
151 if !(m2 > 0.0) {
152 return Err(NormalityError::ZeroVariance);
153 }
154 let mut vals = m4 / (m2 * m2);
155 if !bias {
156 if n < 4 {
157 return Err(NormalityError::InsufficientData { need: 4, got: n });
158 }
159 let nf = n as f64;
160 vals = 1.0 / (nf - 2.0) / (nf - 3.0) * ((nf * nf - 1.0) * vals - 3.0 * (nf - 1.0).powi(2))
161 + 3.0;
162 }
163 Ok(if fisher { vals - 3.0 } else { vals })
164}
165
166pub fn moments(x: &[f64], fisher: bool, bias: bool) -> Result<MomentStats, NormalityError> {
172 let (_n, mean, m2, _m3, _m4) = central_moments(x)?;
173 Ok(MomentStats {
174 mean,
175 variance: m2,
176 skewness: skewness(x, bias)?,
177 kurtosis_excess: kurtosis(x, fisher, bias)?,
178 })
179}
180
181#[derive(Debug, Clone, Copy, PartialEq)]
183pub struct JarqueBera {
184 pub statistic: f64,
186 pub p_value: f64,
188}
189
190pub fn jarque_bera(x: &[f64]) -> Result<JarqueBera, NormalityError> {
196 let (n, _mean, m2, m3, m4) = central_moments(x)?;
197 if n < 2 {
198 return Err(NormalityError::InsufficientData { need: 2, got: n });
199 }
200 #[allow(clippy::neg_cmp_op_on_partial_ord)]
201 if !(m2 > 0.0) {
202 return Err(NormalityError::ZeroVariance);
203 }
204 let s = m3 / m2.powf(1.5);
205 let k = m4 / (m2 * m2) - 3.0;
206 let statistic = n as f64 / 6.0 * (s * s + k * k / 4.0);
207 let p_value = (-statistic / 2.0).exp();
208 Ok(JarqueBera { statistic, p_value })
209}
210
211#[derive(Debug, Clone, Copy, PartialEq)]
213pub struct ShapiroWilk {
214 pub w: f64,
217 pub p_value: f64,
219}
220
221const SW_C1: [f64; 6] = [
224 0.,
225 0.221157,
226 -0.147981,
227 -0.207119e1,
228 0.4434685e1,
229 -0.2706056e1,
230];
231const SW_C2: [f64; 6] = [
232 0.,
233 0.42981e-1,
234 -0.293762,
235 -0.1752461e1,
236 0.5682633e1,
237 -0.3582633e1,
238];
239const SW_C3: [f64; 4] = [0.5440, -0.39978, 0.25054e-1, -0.6714e-3];
240const SW_C4: [f64; 4] = [0.13822e1, -0.77857, 0.62767e-1, -0.20322e-2];
241const SW_C5: [f64; 4] = [-0.15861e1, -0.31082, -0.83751e-1, 0.38915e-2];
242const SW_C6: [f64; 3] = [-0.4803, -0.82676e-1, 0.30302e-2];
243const SW_G: [f64; 2] = [-0.2273e1, 0.459];
244const SW_SMALL: f64 = 1e-19;
245
246fn sw_poly(c: &[f64], x: f64) -> f64 {
249 let nord = c.len();
250 let mut res = c[0];
251 if nord == 1 {
252 return res;
253 }
254 let mut p = x * c[nord - 1];
255 for j in (1..nord - 1).rev() {
256 p = (p + c[j]) * x;
257 }
258 res += p;
259 res
260}
261
262fn sw_alnorm(x: f64, upper: bool) -> f64 {
265 let phi_upper = 0.5 * (1.0 - erf(x / std::f64::consts::SQRT_2));
268 if upper {
269 phi_upper
270 } else {
271 1.0 - phi_upper
272 }
273}
274
275fn sw_ppnd(p: f64) -> f64 {
278 const A0: f64 = 2.50662823884;
279 const A1: f64 = -18.61500062529;
280 const A2: f64 = 41.39119773534;
281 const A3: f64 = -25.44106049637;
282 const B1: f64 = -8.47351093090;
283 const B2: f64 = 23.08336743743;
284 const B3: f64 = -21.06224101826;
285 const B4: f64 = 3.13082909833;
286 const C0: f64 = -2.78718931138;
287 const C1: f64 = -2.29796479134;
288 const C2: f64 = 4.85014127135;
289 const C3: f64 = 2.32121276858;
290 const D1: f64 = 3.54388924762;
291 const D2: f64 = 1.63706781897;
292 const SPLIT: f64 = 0.42;
293
294 let q = p - 0.5;
295 if q.abs() <= SPLIT {
296 let r = q * q;
297 let temp = q * (((A3 * r + A2) * r + A1) * r + A0);
298 return temp / ((((B4 * r + B3) * r + B2) * r + B1) * r + 1.0);
299 }
300 let mut r = if q > 0.0 { 1.0 - p } else { p };
301 if r > 0.0 {
302 r = (-r.ln()).sqrt();
303 } else {
304 return 0.0;
305 }
306 let temp = (((C3 * r + C2) * r + C1) * r + C0) / ((D2 * r + D1) * r + 1.0);
307 if q < 0.0 {
308 -temp
309 } else {
310 temp
311 }
312}
313
314#[allow(clippy::needless_range_loop)]
323pub fn shapiro_wilk(x: &[f64]) -> Result<ShapiroWilk, NormalityError> {
324 let n = x.len();
325 if n < 3 {
326 return Err(NormalityError::InsufficientData { need: 3, got: n });
327 }
328 for &v in x {
329 if !v.is_finite() {
330 return Err(NormalityError::NonFinite);
331 }
332 }
333
334 let mut y = vec![0.0_f64; n + 1]; {
338 let mut sorted = x.to_vec();
339 sorted.sort_by(|a, b| a.total_cmp(b));
340 let shift = x[n / 2];
341 for (i, v) in sorted.into_iter().enumerate() {
342 y[i + 1] = v - shift;
343 }
344 }
345
346 let n2 = n / 2;
347 let mut a = vec![0.0_f64; n2 + 1]; if n == 3 {
350 a[1] = std::f64::consts::FRAC_1_SQRT_2;
351 } else {
352 let an = n as f64;
353 let an25 = an + 0.25;
354 let mut summ2 = 0.0;
355 for i in 1..=n2 {
356 a[i] = sw_ppnd((i as f64 - 0.375) / an25);
357 summ2 += a[i] * a[i];
358 }
359 summ2 *= 2.0;
360 let ssumm2 = summ2.sqrt();
361 let rsn = 1.0 / an.sqrt();
362 let a1 = sw_poly(&SW_C1, rsn) - a[1] / ssumm2;
363
364 let (i1, fac);
365 if n > 5 {
366 i1 = 3;
367 let a2 = -a[2] / ssumm2 + sw_poly(&SW_C2, rsn);
368 fac = ((summ2 - 2.0 * a[1] * a[1] - 2.0 * a[2] * a[2])
369 / (1.0 - 2.0 * a1 * a1 - 2.0 * a2 * a2))
370 .sqrt();
371 a[1] = a1;
372 a[2] = a2;
373 } else {
374 i1 = 2;
375 fac = ((summ2 - 2.0 * a[1] * a[1]) / (1.0 - 2.0 * a1 * a1)).sqrt();
376 a[1] = a1;
377 }
378 for i in i1..=n2 {
379 a[i] = -a[i] / fac;
380 }
381 }
382
383 let coeff = |i: usize, j: usize| -> f64 {
386 let sign = if i >= j { 1.0 } else { -1.0 };
387 sign * a[i.min(j)]
388 };
389
390 let rng = y[n] - y[1];
391 #[allow(clippy::neg_cmp_op_on_partial_ord)]
392 let nonpositive_or_nan = !(rng > 0.0);
393 if nonpositive_or_nan {
394 return Err(NormalityError::ZeroRange);
395 }
396
397 let mut sa = 0.0;
398 let mut sx = 0.0;
399 let mut j = n;
400 for i in 1..=n {
401 let asa = if i != j { coeff(i, j) } else { 0.0 };
402 sa += asa;
403 sx += y[i] / rng;
404 j -= 1;
405 }
406 sa /= n as f64;
407 sx /= n as f64;
408
409 let mut ssa = 0.0;
410 let mut ssx = 0.0;
411 let mut sax = 0.0;
412 let mut j = n;
413 for i in 1..=n {
414 let asa = if i != j { coeff(i, j) - sa } else { -sa };
415 let xsx = y[i] / rng - sx;
416 ssa += asa * asa;
417 ssx += xsx * xsx;
418 sax += asa * xsx;
419 j -= 1;
420 }
421 let ssassx = (ssa * ssx).sqrt();
422 let w1 = (ssassx - sax) * (ssassx + sax) / (ssa * ssx);
423 let w = 1.0 - w1;
424
425 let p_value = if n == 3 {
426 let pi6 = 6.0 / std::f64::consts::PI;
427 let stqr = (0.75_f64).sqrt().asin();
428 (pi6 * (w.sqrt().asin() - stqr)).clamp(0.0, 1.0)
429 } else if w1 <= 0.0 {
430 1.0
433 } else {
434 let an = n as f64;
435 let mut y_t = w1.ln();
436 let xx = an.ln();
437 let (m, s);
438 if n <= 11 {
439 let gamma = sw_poly(&SW_G, an);
440 if y_t >= gamma {
441 return Ok(ShapiroWilk {
442 w,
443 p_value: SW_SMALL,
444 });
445 }
446 y_t = -(gamma - y_t).ln();
447 m = sw_poly(&SW_C3, an);
448 s = sw_poly(&SW_C4, an).exp();
449 } else {
450 m = sw_poly(&SW_C5, xx);
451 s = sw_poly(&SW_C6, xx).exp();
452 }
453 sw_alnorm((y_t - m) / s, true)
454 };
455
456 Ok(ShapiroWilk { w, p_value })
457}
458
459#[cfg(test)]
460mod tests {
461 use super::*;
462
463 const V1: [f64; 15] = [
469 0.12, -0.34, 0.05, 0.88, -1.21, 0.42, -0.07, 0.63, -0.55, 0.19, 0.27, -0.91, 1.04, -0.16,
470 0.33,
471 ];
472 const V2: [f64; 12] = [
473 1.0, -2.0, 0.5, 3.2, -1.1, 0.0, 2.3, -0.7, 4.5, -3.1, 0.9, -1.8,
474 ];
475
476 const TOL: f64 = 1e-11;
477
478 fn close(got: f64, want: f64, tol: f64) {
479 assert!(
480 (got - want).abs() <= tol + tol * want.abs(),
481 "got {got}, want {want}, diff {}",
482 (got - want).abs()
483 );
484 }
485
486 #[test]
487 fn skew_matches_scipy() {
488 close(skewness(&V1, true).unwrap(), -3.990_837_649_877_545e-1, TOL);
490 close(skewness(&V1, false).unwrap(), -4.448_671_685_942_52e-1, TOL);
491 close(skewness(&V2, true).unwrap(), 3.471_961_494_435_007e-1, TOL);
492 close(
493 skewness(&V2, false).unwrap(),
494 3.988_980_062_229_937_6e-1,
495 TOL,
496 );
497 }
498
499 #[test]
500 fn kurtosis_matches_scipy() {
501 close(
503 kurtosis(&V1, true, true).unwrap(),
504 -3.608_466_739_341_209_5e-1,
505 TOL,
506 );
507 close(
508 kurtosis(&V1, false, true).unwrap(),
509 2.639_153_326_065_879,
510 TOL,
511 );
512 close(
513 kurtosis(&V1, true, false).unwrap(),
514 2.032_272_460_741_557_7e-2,
515 TOL,
516 );
517 close(
518 kurtosis(&V2, true, true).unwrap(),
519 -7.089_134_727_921_165e-1,
520 TOL,
521 );
522 close(
523 kurtosis(&V2, false, true).unwrap(),
524 2.291_086_527_207_883_5,
525 TOL,
526 );
527 close(
528 kurtosis(&V2, true, false).unwrap(),
529 -3.930_514_067_696_959_7e-1,
530 TOL,
531 );
532 }
533
534 #[test]
535 fn moments_bundle_matches_components() {
536 let m = moments(&V1, true, true).unwrap();
537 close(m.mean, 4.6e-2, TOL);
538 close(m.variance, 3.582_106_666_666_667_3e-1, TOL);
539 close(m.skewness, skewness(&V1, true).unwrap(), 0.0);
540 close(m.kurtosis_excess, kurtosis(&V1, true, true).unwrap(), 0.0);
541 }
542
543 #[test]
544 fn jarque_bera_matches_scipy() {
545 let jb1 = jarque_bera(&V1).unwrap();
546 close(jb1.statistic, 4.795_510_799_978_267_6e-1, TOL);
547 close(jb1.p_value, 7.868_044_473_746_433e-1, TOL);
548 let jb2 = jarque_bera(&V2).unwrap();
549 close(jb2.statistic, 4.923_694_883_298_767_6e-1, TOL);
550 close(jb2.p_value, 7.817_777_826_998_267e-1, TOL);
551 }
552
553 #[test]
554 fn shapiro_wilk_matches_scipy() {
555 let sw1 = shapiro_wilk(&V1).unwrap();
556 close(sw1.w, 9.760_100_117_114_072e-1, 1e-10);
557 close(sw1.p_value, 9.349_583_655_477_645e-1, 1e-9);
558 let sw2 = shapiro_wilk(&V2).unwrap();
559 close(sw2.w, 9.773_113_095_849_641e-1, 1e-10);
560 close(sw2.p_value, 9.706_201_224_239_078e-1, 1e-9);
561 }
562
563 #[test]
564 fn shapiro_wilk_n3_path() {
565 let x = [0.1, -0.4, 0.9];
567 let sw = shapiro_wilk(&x).unwrap();
568 assert!(sw.w > 0.0 && sw.w <= 1.0 + 1e-12);
569 assert!((0.0..=1.0).contains(&sw.p_value));
570 }
571
572 #[test]
573 fn rejects_degenerate_inputs() {
574 assert_eq!(
575 skewness(&[1.0, 1.0, 1.0], true),
576 Err(NormalityError::ZeroVariance)
577 );
578 assert_eq!(
579 shapiro_wilk(&[2.0, 2.0, 2.0]),
580 Err(NormalityError::ZeroRange)
581 );
582 assert!(matches!(
583 skewness(&[1.0, 2.0], false),
584 Err(NormalityError::InsufficientData { need: 3, got: 2 })
585 ));
586 assert!(matches!(
587 kurtosis(&[1.0, 2.0, 3.0], true, false),
588 Err(NormalityError::InsufficientData { need: 4, got: 3 })
589 ));
590 assert_eq!(
591 skewness(&[1.0, f64::NAN, 2.0], true),
592 Err(NormalityError::NonFinite)
593 );
594 assert!(matches!(
595 shapiro_wilk(&[1.0, 2.0]),
596 Err(NormalityError::InsufficientData { need: 3, got: 2 })
597 ));
598 }
599}