#[cfg(not(debug_assertions))]
#[cfg(test)]
extern crate num_bigint;
#[cfg(not(debug_assertions))]
#[cfg(test)]
extern crate num_rational;
#[cfg(not(debug_assertions))]
#[cfg(test)]
extern crate rand;
#[cfg(not(debug_assertions))]
#[cfg(test)]
extern crate rayon;
#[cfg(not(debug_assertions))]
#[cfg(test)]
extern crate vector_utils;
use num_traits::{Num, One, Zero};
use std::ops::MulAssign;
pub fn stirling2_table<T: Num + Clone + From<u32>>(n_max: usize) -> Vec<Vec<T>> {
let mut s = Vec::<Vec<T>>::new();
let zero: T = Zero::zero();
let one: T = One::one();
for n in 0..=n_max {
s.push(vec![zero.clone(); n + 1]);
}
s[0][0] = one.clone();
for n in 1..=n_max {
s[n][0] = zero.clone();
for k in 1..n {
s[n][k] = T::from(k as u32) * s[n - 1][k].clone() + s[n - 1][k - 1].clone();
}
s[n][n] = one.clone();
}
s
}
pub fn stirling2_ratio_table<T: Num + Clone + MulAssign + From<u32>>(n_max: usize) -> Vec<Vec<T>> {
let mut s = Vec::<Vec<T>>::new();
let zero: T = Zero::zero();
let one: T = One::one();
for n in 0..=n_max {
s.push(vec![zero.clone(); n + 1]);
}
s[0][0] = one.clone();
let mut z = Vec::<T>::new();
for n in 1..=n_max {
s[n][0] = zero.clone();
for k in 1..n - 1 {
z[k - 1] *= T::from((k - 1) as u32) / T::from(k as u32);
}
if n >= 2 {
let mut u = one.clone();
for _ in 0..n - 1 {
u *= T::from((n - 2) as u32) / T::from((n - 1) as u32);
}
z.push(u);
}
for k in 1..n {
let x = z[k - 1].clone(); s[n][k] = s[n - 1][k].clone() + s[n - 1][k - 1].clone() * x;
}
s[n][n] = one.clone(); for j in 1..=n {
s[n][n] *= T::from(j as u32) / T::from(n as u32);
}
}
s
}
#[allow(clippy::many_single_char_names)]
pub fn p_at_most_m_distinct_in_sample_of_x_from_n(
m: usize,
x: usize,
n: usize,
sr: &[Vec<f64>],
) -> f64 {
let mut p = 1.0;
for u in m + 1..=x {
let mut z = sr[x][u];
for _ in 0..x {
z *= u as f64 / n as f64;
}
for v in 1..=u {
z *= (n - v + 1) as f64 / (u - v + 1) as f64;
}
p -= z;
}
if p < 0.0 {
p = 0.0;
}
p
}
#[cfg(test)]
mod tests {
#[cfg(debug_assertions)]
#[test]
fn test_vdj_stirling_stuff_fail() {
println!(
"\n\"cargo test\" deliberately fails here because without running in release mode,"
);
println!("the test would be too slow.\n");
panic!("aborting");
}
#[cfg(not(debug_assertions))]
#[test]
fn test_stirling_stuff() {
use num_bigint::{BigInt, BigUint, ToBigUint};
use num_rational::Ratio;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use vector_utils::*;
use super::*;
fn simulate_p_at_most_m_distinct_in_sample_of_x_from_n(
m: usize,
x: usize,
n: usize,
count: usize,
) -> f64 {
let group = 1000;
assert!(count % group == 0);
let mut rng: StdRng = SeedableRng::from_seed([0_u8; 32]);
let mut seeds = Vec::<[u8; 32]>::new();
for _ in 0..group {
let mut x = [0_u8; 32];
for j in 0..x.len() {
x[j] = rng.gen_range(0..255);
}
seeds.push(x);
}
let goods: Vec<_> = seeds
.par_iter()
.map(|seed| {
let mut rng: StdRng = SeedableRng::from_seed(*seed);
let mut goods = 0;
for _ in 0..count / group {
let mut sample = Vec::<usize>::new();
for _ in 0..x {
sample.push(rng.gen_range(0..n));
}
unique_sort(&mut sample);
if sample.len() <= m {
goods += 1;
}
}
goods
})
.collect();
goods.iter().sum::<usize>() as f64 / count as f64
}
fn assert_equal_to_d_digits(x1: &BigUint, x2: &BigUint, d: usize) {
let mut n = 1_usize;
for _ in 0..d {
n *= 10;
}
let y1 = x1.clone() * n.to_biguint().unwrap();
let y1x2 = y1 / x2.clone();
if y1x2 != n.to_biguint().unwrap()
&& y1x2 != (n - 1).to_biguint().unwrap()
&& y1x2 != (n + 1).to_biguint().unwrap()
{
eprintln!("x1 != x2 to {} digits, y1x2 = {}", y1x2, d);
assert!(0 == 1);
}
}
let n_max = 3000;
let s2 = stirling2_table::<f64>(n_max);
assert_eq!(s2[10][5], 42525.0);
let nb = 722;
let sbig = stirling2_table::<BigUint>(nb);
let n = 219;
for k in 1..=n {
let r = Ratio::<BigInt>::from_float(s2[n][k]).unwrap();
let (rnum, rden) = (
r.numer().to_biguint().unwrap(),
r.denom().to_biguint().unwrap(),
);
let x1 = sbig[n][k].clone() * rden;
let x2 = rnum;
assert_equal_to_d_digits(&x1, &x2, 14);
}
let n_max = 2500;
let n = nb;
let sr = stirling2_ratio_table::<f64>(n_max);
for k in 1..=n {
let mut kf = 1.to_biguint().unwrap(); for j in 1..=k {
kf *= j.to_biguint().unwrap();
}
let mut kn = 1.to_biguint().unwrap(); for _ in 1..=n {
kn *= k.to_biguint().unwrap();
}
let r = Ratio::<BigInt>::from_float(sr[n][k]).unwrap();
let (rnum, rden) = (
r.numer().to_biguint().unwrap(),
r.denom().to_biguint().unwrap(),
);
let x1 = sbig[n][k].clone() * kf * rden;
let x2 = kn * rnum;
assert_equal_to_d_digits(&x1, &x2, 12);
}
let m = 27;
let x = 30;
let n = 2500;
let p1 = p_at_most_m_distinct_in_sample_of_x_from_n(m, x, n, &sr);
let p2 = simulate_p_at_most_m_distinct_in_sample_of_x_from_n(m, x, n, 100_000_000);
assert_eq!(format!("{:.7}", p1), "0.0005953");
assert_eq!(format!("{:.7}", p2), "0.0005943");
}
}