use rand::Rng;
use rand_distr::{Binomial, Distribution};
#[cfg(test)]
#[must_use]
pub fn expected_vaf(
ccf: f64,
multiplicity: u32,
purity: f64,
cn_tumour: u32,
cn_normal: u32,
) -> f64 {
let numerator = ccf * (multiplicity as f64) * purity;
let denominator = purity * (cn_tumour as f64) + (1.0 - purity) * (cn_normal as f64);
if denominator == 0.0 {
return 0.0;
}
numerator / denominator
}
pub fn sample_alt_count<R: Rng>(total_depth: u32, expected_vaf: f64, rng: &mut R) -> u32 {
if expected_vaf <= 0.0 {
return 0;
}
if expected_vaf >= 1.0 {
return total_depth;
}
let dist = Binomial::new(total_depth as u64, expected_vaf).expect("invalid binomial params");
dist.sample(rng) as u32
}
#[cfg(test)]
mod tests {
use super::*;
use rand::rngs::StdRng;
use rand::SeedableRng;
#[test]
fn test_expected_vaf_simple() {
let vaf = expected_vaf(1.0, 1, 1.0, 2, 2);
assert!((vaf - 0.5).abs() < 1e-10);
}
#[test]
fn test_expected_vaf_with_purity() {
let vaf = expected_vaf(1.0, 1, 0.5, 2, 2);
assert!((vaf - 0.25).abs() < 1e-10);
}
#[test]
fn test_expected_vaf_subclone() {
let vaf = expected_vaf(0.3, 1, 1.0, 2, 2);
assert!((vaf - 0.15).abs() < 1e-10);
}
#[test]
fn test_expected_vaf_cnv_gain() {
let vaf = expected_vaf(1.0, 1, 1.0, 4, 2);
assert!((vaf - 0.25).abs() < 1e-10);
}
#[test]
fn test_expected_vaf_amplified_allele() {
let vaf = expected_vaf(1.0, 3, 1.0, 4, 2);
assert!((vaf - 0.75).abs() < 1e-10);
}
#[test]
fn test_expected_vaf_low_tf() {
let vaf = expected_vaf(1.0, 1, 0.005, 2, 2);
assert!((vaf - 0.0025).abs() < 1e-10);
}
#[test]
fn test_binomial_sampling_mean() {
let mut rng = StdRng::seed_from_u64(42);
let n = 100_000;
let depth = 100u32;
let vaf = 0.1;
let total: u32 = (0..n).map(|_| sample_alt_count(depth, vaf, &mut rng)).sum();
let mean = total as f64 / n as f64;
assert!(
(mean - 10.0).abs() < 0.5,
"binomial mean {} should be close to 10",
mean
);
}
#[test]
fn test_binomial_sampling_variance() {
let mut rng = StdRng::seed_from_u64(42);
let n = 100_000;
let depth = 30u32;
let vaf = 0.1;
let counts: Vec<u32> = (0..n)
.map(|_| sample_alt_count(depth, vaf, &mut rng))
.collect();
let mean = counts.iter().sum::<u32>() as f64 / n as f64;
let variance = counts
.iter()
.map(|&c| (c as f64 - mean).powi(2))
.sum::<f64>()
/ n as f64;
assert!(
(variance - 2.7).abs() < 0.5,
"variance {} should be close to 2.7",
variance
);
}
#[test]
fn test_binomial_not_deterministic() {
let mut rng = StdRng::seed_from_u64(42);
let counts: Vec<u32> = (0..100)
.map(|_| sample_alt_count(30, 0.1, &mut rng))
.collect();
let unique: std::collections::HashSet<_> = counts.iter().collect();
assert!(
unique.len() > 1,
"binomial sampling should produce variable counts"
);
}
#[test]
fn test_ultra_low_vaf_unbiased_2000x() {
let mut rng = StdRng::seed_from_u64(42);
let n = 100_000u64;
let total: u64 = (0..n)
.map(|_| sample_alt_count(2000, 0.0001, &mut rng) as u64)
.sum();
let mean = total as f64 / n as f64;
assert!(
(0.15..=0.25).contains(&mean),
"mean {} should be 0.2 ± 0.05 for ultra-low VAF at 2000x",
mean
);
}
#[test]
fn test_ultra_low_vaf_unbiased_5000x() {
let mut rng = StdRng::seed_from_u64(43);
let n = 100_000u64;
let total: u64 = (0..n)
.map(|_| sample_alt_count(5000, 0.00001, &mut rng) as u64)
.sum();
let mean = total as f64 / n as f64;
assert!(
(0.03..=0.07).contains(&mean),
"mean {} should be 0.05 ± 0.02 for ultra-low VAF at 5000x",
mean
);
}
#[test]
fn test_vaf_1_all_alt() {
let mut rng = StdRng::seed_from_u64(44);
for _ in 0..100 {
let count = sample_alt_count(100, 1.0, &mut rng);
assert_eq!(count, 100, "VAF=1.0 must always yield all alt reads");
}
}
}