use rand::Rng;
use rand_distr::{Distribution, LogNormal};
use crate::core::types::{MutationType, SvType, Variant};
pub fn generate_hrd_deletions<R: Rng>(
chrom_lengths: &[(String, u64)],
count: usize,
rng: &mut R,
) -> Vec<Variant> {
let mut variants = Vec::with_capacity(count);
if chrom_lengths.is_empty() || count == 0 {
return variants;
}
let size_dist = LogNormal::new(13.1, 1.2).expect("valid log-normal params");
for _ in 0..count {
let (chrom, chrom_len) = match pick_chrom(chrom_lengths, rng) {
Some(v) => v,
None => continue,
};
let size = (size_dist.sample(rng) as u64).clamp(100_000, 10_000_000);
if chrom_len <= size + 1 {
continue; }
let start = rng.random_range(0..chrom_len - size);
let end = start + size;
variants.push(Variant {
chrom: chrom.clone(),
mutation: MutationType::Sv {
sv_type: SvType::Deletion,
chrom,
start,
end,
},
expected_vaf: 0.5,
clone_id: Some("hrd_loh".to_string()),
haplotype: None,
ccf: None,
});
}
variants
}
pub fn generate_tdp_duplications<R: Rng>(
chrom_lengths: &[(String, u64)],
count: usize,
rng: &mut R,
) -> Vec<Variant> {
let mut variants = Vec::with_capacity(count);
if chrom_lengths.is_empty() || count == 0 {
return variants;
}
let size_dist = LogNormal::new(8.0, 0.8).expect("valid log-normal params");
for _ in 0..count {
let (chrom, chrom_len) = match pick_chrom(chrom_lengths, rng) {
Some(v) => v,
None => continue,
};
let size = (size_dist.sample(rng) as u64).clamp(1_000, 10_000);
if chrom_len <= size + 1 {
continue;
}
let start = rng.random_range(0..chrom_len - size);
let end = start + size;
variants.push(Variant {
chrom: chrom.clone(),
mutation: MutationType::Sv {
sv_type: SvType::Duplication,
chrom,
start,
end,
},
expected_vaf: 0.5,
clone_id: Some("tdp".to_string()),
haplotype: None,
ccf: None,
});
}
variants
}
pub fn generate_chromothripsis<R: Rng>(
chrom_lengths: &[(String, u64)],
count: usize,
rng: &mut R,
) -> Vec<Variant> {
let mut variants = Vec::with_capacity(count);
if chrom_lengths.is_empty() || count == 0 {
return variants;
}
let (shattered_chrom, chrom_len) = match pick_chrom(chrom_lengths, rng) {
Some(v) => v,
None => return variants,
};
let window_size = rng.random_range(10_000_000u64..=50_000_000).min(chrom_len);
let window_start = if chrom_len > window_size {
rng.random_range(0..chrom_len - window_size)
} else {
0
};
let window_end = window_start + window_size;
if window_size < 2_000 {
return variants;
}
for _ in 0..count {
let start = rng.random_range(window_start..window_end.saturating_sub(1_000));
let size = rng
.random_range(100_000u64..=5_000_000)
.min(window_end - start);
let end = start + size;
let sv_type = match rng.random_range(0..3u8) {
0 => SvType::Deletion,
1 => SvType::Inversion,
_ => SvType::Duplication,
};
variants.push(Variant {
chrom: shattered_chrom.clone(),
mutation: MutationType::Sv {
sv_type,
chrom: shattered_chrom.clone(),
start,
end,
},
expected_vaf: 0.5,
clone_id: Some("chromothripsis".to_string()),
haplotype: None,
ccf: None,
});
}
variants
}
fn pick_chrom<R: Rng>(chrom_lengths: &[(String, u64)], rng: &mut R) -> Option<(String, u64)> {
let total_len: u64 = chrom_lengths.iter().map(|(_, l)| l).sum();
if total_len == 0 {
return None;
}
let pick = rng.random_range(0..total_len);
let mut cumulative = 0u64;
for (chrom, len) in chrom_lengths {
cumulative += len;
if pick < cumulative {
return Some((chrom.clone(), *len));
}
}
chrom_lengths.last().map(|(c, l)| (c.clone(), *l))
}
#[cfg(test)]
mod tests {
use super::*;
use rand::rngs::StdRng;
use rand::SeedableRng;
fn test_chroms() -> Vec<(String, u64)> {
vec![
("chr1".to_string(), 248_956_422),
("chr2".to_string(), 242_193_529),
("chr3".to_string(), 198_295_559),
]
}
#[test]
fn test_hrd_deletions_count() {
let mut rng = StdRng::seed_from_u64(42);
let chroms = test_chroms();
let variants = generate_hrd_deletions(&chroms, 10, &mut rng);
assert!(!variants.is_empty());
assert!(variants.len() <= 10);
}
#[test]
fn test_hrd_deletions_size_range() {
let mut rng = StdRng::seed_from_u64(1);
let chroms = test_chroms();
let variants = generate_hrd_deletions(&chroms, 100, &mut rng);
for v in &variants {
if let MutationType::Sv { start, end, .. } = &v.mutation {
let size = end - start;
assert!(
(100_000..=10_000_000).contains(&size),
"HRD deletion size {size} out of expected range"
);
}
}
}
#[test]
fn test_tdp_duplications_size_range() {
let mut rng = StdRng::seed_from_u64(2);
let chroms = test_chroms();
let variants = generate_tdp_duplications(&chroms, 100, &mut rng);
for v in &variants {
if let MutationType::Sv {
sv_type,
start,
end,
..
} = &v.mutation
{
assert_eq!(*sv_type, SvType::Duplication);
let size = end - start;
assert!(
(1_000..=10_000).contains(&size),
"TDP duplication size {size} out of expected range"
);
}
}
}
#[test]
fn test_chromothripsis_single_chrom() {
let mut rng = StdRng::seed_from_u64(3);
let chroms = test_chroms();
let variants = generate_chromothripsis(&chroms, 20, &mut rng);
assert!(!variants.is_empty());
let chrom_name = &variants[0].chrom;
for v in &variants {
assert_eq!(
&v.chrom, chrom_name,
"chromothripsis variants must all be on the same chromosome"
);
}
}
#[test]
fn test_empty_inputs() {
let mut rng = StdRng::seed_from_u64(0);
assert!(generate_hrd_deletions(&[], 10, &mut rng).is_empty());
assert!(generate_tdp_duplications(&[], 10, &mut rng).is_empty());
assert!(generate_chromothripsis(&[], 10, &mut rng).is_empty());
let chroms = test_chroms();
assert!(generate_hrd_deletions(&chroms, 0, &mut rng).is_empty());
assert!(generate_tdp_duplications(&chroms, 0, &mut rng).is_empty());
assert!(generate_chromothripsis(&chroms, 0, &mut rng).is_empty());
}
#[test]
fn test_clone_ids() {
let mut rng = StdRng::seed_from_u64(99);
let chroms = test_chroms();
let hrd = generate_hrd_deletions(&chroms, 5, &mut rng);
for v in &hrd {
assert_eq!(v.clone_id.as_deref(), Some("hrd_loh"));
}
let tdp = generate_tdp_duplications(&chroms, 5, &mut rng);
for v in &tdp {
assert_eq!(v.clone_id.as_deref(), Some("tdp"));
}
let ct = generate_chromothripsis(&chroms, 5, &mut rng);
for v in &ct {
assert_eq!(v.clone_id.as_deref(), Some("chromothripsis"));
}
}
}