use crate::{
math::{erfc, igamc},
result::TestResult,
rng::Rng,
};
use std::f64::consts::{LN_2, SQRT_2};
const GOFS_MIN_EXPECTED: f64 = 10.0;
fn strip_b(word: u32, r: usize, s: usize) -> u32 {
if r == 0 {
word >> (32 - s)
} else {
(word << r) >> (32 - s)
}
}
fn bit_chunks(rng: &mut impl Rng, r: usize, s: usize) -> impl Iterator<Item = (u32, usize)> + '_ {
std::iter::from_fn(move || Some((strip_b(rng.next_u32(), r, s), s)))
}
fn binomial_probs(l: usize) -> Vec<f64> {
let mut probs = vec![0.0; l + 1];
probs[0] = (-(l as f64) * LN_2).exp();
for k in 1..=l {
probs[k] = probs[k - 1] * (l + 1 - k) as f64 / k as f64;
}
probs
}
fn next_block_weight(blocks: &mut impl Iterator<Item = (u32, usize)>, l: usize) -> Option<usize> {
let mut remaining = l;
let mut weight = 0usize;
while remaining > 0 {
let (chunk, width) = blocks.next()?;
let take = remaining.min(width);
let mask = if take == 32 {
u32::MAX
} else {
(1u32 << take) - 1
};
weight += (chunk & mask).count_ones() as usize;
remaining -= take;
}
Some(weight)
}
fn chi_square_pvalue(chi_square: f64, degrees_of_freedom: usize) -> f64 {
if degrees_of_freedom == 0 {
return f64::NAN;
}
igamc(degrees_of_freedom as f64 / 2.0, chi_square / 2.0)
}
fn chi_square(expected: &[f64], observed: &[u64]) -> f64 {
expected
.iter()
.zip(observed)
.filter(|(e, _)| **e > 0.0)
.map(|(e, &o)| {
let d = o as f64 - *e;
d * d / *e
})
.sum()
}
fn lumped_chi_square(expected: &[f64], observed: &[u64], min_expected: f64) -> (f64, usize, usize) {
let mut kept_expected = Vec::new();
let mut kept_observed = Vec::new();
let mut lumped_expected = 0.0;
let mut lumped_observed = 0u64;
let mut lumped_cells = 0usize;
for (&e, &o) in expected.iter().zip(observed) {
if e >= min_expected {
kept_expected.push(e);
kept_observed.push(o);
} else {
lumped_expected += e;
lumped_observed += o;
lumped_cells += 1;
}
}
if lumped_expected >= min_expected {
kept_expected.push(lumped_expected);
kept_observed.push(lumped_observed);
} else if !kept_expected.is_empty() {
let last = kept_expected.len() - 1;
kept_expected[last] += lumped_expected;
kept_observed[last] += lumped_observed;
} else {
let split = observed.len() / 2;
let e0: f64 = expected[..split].iter().sum();
let e1: f64 = expected[split..].iter().sum();
let o0: u64 = observed[..split].iter().sum();
let o1: u64 = observed[split..].iter().sum();
kept_expected.extend([e0, e1]);
kept_observed.extend([o0, o1]);
}
let classes = kept_expected.len();
let dof = classes.saturating_sub(1);
(
chi_square(&kept_expected, &kept_observed),
dof,
lumped_cells,
)
}
#[derive(Debug, Clone)]
pub struct HammingCorrSummary {
pub n: usize,
pub r: usize,
pub s: usize,
pub l: usize,
pub rho_hat: f64,
pub z_score: f64,
pub p_value: f64,
}
pub fn hamming_corr(
rng: &mut impl Rng,
n: usize,
r: usize,
s: usize,
l: usize,
) -> HammingCorrSummary {
assert!(n >= 2, "n must be at least 2");
assert!(s > 0 && s <= 32, "s must be in 1..=32");
assert!(r + s <= 32, "r + s must be <= 32");
let mut chunks = bit_chunks(rng, r, s);
let mut prev = next_block_weight(&mut chunks, l).expect("insufficient stream");
let mut sum = 0.0f64;
let center = l as f64 / 2.0;
for _ in 1..n {
let cur = next_block_weight(&mut chunks, l).expect("insufficient stream");
sum += (prev as f64 - center) * (cur as f64 - center);
prev = cur;
}
let rho_hat = 4.0 * sum / ((n - 1) as f64 * l as f64);
let z_score = rho_hat * ((n - 1) as f64).sqrt();
let p_value = erfc(z_score.abs() / SQRT_2);
HammingCorrSummary {
n,
r,
s,
l,
rho_hat,
z_score,
p_value,
}
}
pub fn hamming_corr_result(summary: &HammingCorrSummary) -> TestResult {
TestResult::with_note(
"testu01::hamming_corr",
summary.p_value,
format!(
"n={}, r={}, s={}, L={}, rho_hat={:.6}, z={:.4}",
summary.n, summary.r, summary.s, summary.l, summary.rho_hat, summary.z_score
),
)
}
#[derive(Debug, Clone)]
pub struct HammingIndepSummary {
pub n: usize,
pub r: usize,
pub s: usize,
pub l: usize,
pub d: usize,
pub main_chi_square: f64,
pub main_dof: usize,
pub main_p_value: f64,
pub lumped_cells: usize,
pub block_chi_square: Vec<f64>,
pub block_dof: Vec<usize>,
pub block_p_value: Vec<f64>,
}
pub fn hamming_indep(
rng: &mut impl Rng,
n: usize,
r: usize,
s: usize,
l: usize,
d: usize,
) -> HammingIndepSummary {
assert!(n as f64 >= 2.0 * GOFS_MIN_EXPECTED, "n must be >= 20");
assert!(s > 0 && s <= 32, "s must be in 1..=32");
assert!(r + s <= 32, "r + s must be <= 32");
assert!((1..=8).contains(&d), "d must be in 1..=8");
assert!(d <= l.div_ceil(2), "d must be <= (L + 1) / 2");
let probs = binomial_probs(l);
let width = l + 1;
let mut counts = vec![0u64; width * width];
let mut chunks = bit_chunks(rng, r, s);
for _ in 0..n {
let x = next_block_weight(&mut chunks, l).expect("insufficient stream");
let y = next_block_weight(&mut chunks, l).expect("insufficient stream");
counts[x * width + y] += 1;
}
let mut expected = vec![0.0f64; width * width];
for i in 0..=l {
for j in 0..=l {
expected[i * width + j] = n as f64 * probs[i] * probs[j];
}
}
let (main_chi_square, main_dof, lumped_cells) =
lumped_chi_square(&expected, &counts, GOFS_MIN_EXPECTED);
let main_p_value = chi_square_pvalue(main_chi_square, main_dof);
let l2 = l / 2;
let mut l1 = l / 2;
if l % 2 == 1 {
l1 += 1;
}
let mut block_chi_square = Vec::with_capacity(d);
let mut block_dof = Vec::with_capacity(d);
let mut block_p_value = Vec::with_capacity(d);
for k in 1..=d {
let mut xd0 = 0u64;
let mut xd1 = 0u64;
for i in 0..=l1 - k {
for j in 0..=l1 - k {
xd0 += counts[i * width + j];
}
}
for i in l2 + k..=l {
for j in l2 + k..=l {
xd0 += counts[i * width + j];
}
}
for i in 0..=l1 - k {
for j in l2 + k..=l {
xd1 += counts[i * width + j];
}
}
for i in l2 + k..=l {
for j in 0..=l1 - k {
xd1 += counts[i * width + j];
}
}
let tail_mass: f64 = probs[..=l1 - k].iter().sum();
let nb_moyen = tail_mass * tail_mass * n as f64 * 2.0;
let expected_block = [nb_moyen, nb_moyen, n as f64 - 2.0 * nb_moyen];
let observed_block = [xd0, xd1, n as u64 - xd0 - xd1];
let chi = chi_square(&expected_block, &observed_block);
let dof = if (l % 2 == 1) && k == 1 { 1 } else { 2 };
block_chi_square.push(chi);
block_dof.push(dof);
block_p_value.push(chi_square_pvalue(chi, dof));
}
HammingIndepSummary {
n,
r,
s,
l,
d,
main_chi_square,
main_dof,
main_p_value,
lumped_cells,
block_chi_square,
block_dof,
block_p_value,
}
}
pub fn hamming_indep_main_result(summary: &HammingIndepSummary) -> TestResult {
TestResult::with_note(
"testu01::hamming_indep_main",
summary.main_p_value,
format!(
"n={}, r={}, s={}, L={}, dof={}, lumped_cells={}, chi2={:.4}",
summary.n,
summary.r,
summary.s,
summary.l,
summary.main_dof,
summary.lumped_cells,
summary.main_chi_square
),
)
}
pub fn hamming_indep_block_result(summary: &HammingIndepSummary, k: usize) -> TestResult {
let idx = k - 1;
TestResult::with_note(
"testu01::hamming_indep_block",
summary.block_p_value[idx],
format!(
"n={}, r={}, s={}, L={}, d={}, dof={}, chi2={:.4}",
summary.n,
summary.r,
summary.s,
summary.l,
k,
summary.block_dof[idx],
summary.block_chi_square[idx]
),
)
}
#[cfg(test)]
mod tests {
use super::{binomial_probs, hamming_corr};
use crate::rng::ConstantRng;
#[test]
fn binomial_probs_sum_to_one() {
let probs = binomial_probs(12);
let sum: f64 = probs.iter().sum();
assert!((sum - 1.0).abs() < 1e-12);
}
#[test]
fn hamming_corr_rejects_constant_stream() {
let mut rng = ConstantRng::new(0);
let summary = hamming_corr(&mut rng, 128, 0, 8, 16);
assert!(summary.p_value < 1e-6);
}
}