use crate::core::types::Read;
use crate::seq_utils::complement;
#[cfg(test)]
use crate::variants::vaf::sample_alt_count;
#[cfg(test)]
use rand::Rng;
#[derive(Debug, Clone, PartialEq)]
pub enum StructuralVariant {
Deletion {
chrom: String,
start: u64,
end: u64,
},
Insertion {
chrom: String,
pos: u64,
sequence: Vec<u8>,
},
Inversion {
chrom: String,
start: u64,
end: u64,
},
Duplication {
chrom: String,
start: u64,
end: u64,
copies: u32,
},
Translocation {
chrom1: String,
pos1: u64,
chrom2: String,
pos2: u64,
},
}
impl StructuralVariant {
pub fn chrom(&self) -> &str {
match self {
Self::Deletion { chrom, .. } => chrom,
Self::Insertion { chrom, .. } => chrom,
Self::Inversion { chrom, .. } => chrom,
Self::Duplication { chrom, .. } => chrom,
Self::Translocation { chrom1, .. } => chrom1,
}
}
pub fn start(&self) -> u64 {
match self {
Self::Deletion { start, .. } => *start,
Self::Insertion { pos, .. } => *pos,
Self::Inversion { start, .. } => *start,
Self::Duplication { start, .. } => *start,
Self::Translocation { pos1, .. } => *pos1,
}
}
pub fn end(&self) -> u64 {
match self {
Self::Deletion { end, .. } => *end,
Self::Insertion { pos, .. } => pos + 1,
Self::Inversion { end, .. } => *end,
Self::Duplication { end, .. } => *end,
Self::Translocation { pos1, .. } => pos1 + 1,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum SvReadEffect {
Unaffected,
SplitLeft,
SplitRight,
Deleted,
Modified,
Chimeric,
}
pub fn apply_deletion(read: &mut Read, read_start: u64, sv: &StructuralVariant) -> SvReadEffect {
let (chrom_sv, del_start, del_end) = match sv {
StructuralVariant::Deletion { chrom, start, end } => (chrom.as_str(), *start, *end),
_ => return SvReadEffect::Unaffected,
};
let _ = chrom_sv;
let read_end = read_start + read.len() as u64;
let read_len = read.len();
if read_start >= del_start && read_end <= del_end {
return SvReadEffect::Deleted;
}
if read_start < del_start && read_end > del_start && read_end <= del_end {
let keep = (del_start - read_start) as usize;
soft_clip_right(read, keep, read_len);
return SvReadEffect::SplitLeft;
}
if read_start >= del_start && read_start < del_end && read_end > del_end {
let clip_len = (del_end - read_start) as usize;
soft_clip_left(read, clip_len, read_len);
return SvReadEffect::SplitRight;
}
if read_start < del_start && read_end > del_end {
let keep = (del_start - read_start) as usize;
soft_clip_right(read, keep, read_len);
return SvReadEffect::SplitLeft;
}
SvReadEffect::Unaffected
}
pub fn apply_insertion(read: &mut Read, read_start: u64, sv: &StructuralVariant) -> SvReadEffect {
let (ins_pos, ins_seq) = match sv {
StructuralVariant::Insertion { pos, sequence, .. } => (*pos, sequence.as_slice()),
_ => return SvReadEffect::Unaffected,
};
let read_end = read_start + read.len() as u64;
if ins_pos < read_start || ins_pos >= read_end {
return SvReadEffect::Unaffected;
}
let offset = (ins_pos - read_start) as usize;
let original_len = read.len();
let mut new_seq: Vec<u8> = Vec::with_capacity(original_len);
new_seq.extend_from_slice(&read.seq[..offset]);
new_seq.extend_from_slice(ins_seq);
new_seq.extend_from_slice(&read.seq[offset..]);
let mut new_qual: Vec<u8> = Vec::with_capacity(original_len);
new_qual.extend_from_slice(&read.qual[..offset]);
let avg_qual = if offset > 0 && offset < read.qual.len() {
((read.qual[offset - 1] as u16 + read.qual[offset] as u16) / 2) as u8
} else {
read.qual[offset.min(read.qual.len() - 1)]
};
for _ in 0..ins_seq.len() {
new_qual.push(avg_qual);
}
new_qual.extend_from_slice(&read.qual[offset..]);
let clip_start = original_len;
new_seq.truncate(original_len);
new_qual.truncate(original_len);
let clipped = ins_seq.len().min(original_len.saturating_sub(clip_start));
let _ = clipped;
read.seq = new_seq;
read.qual = new_qual;
SvReadEffect::Modified
}
pub fn apply_inversion(read: &mut Read, read_start: u64, sv: &StructuralVariant) -> SvReadEffect {
let (inv_start, inv_end) = match sv {
StructuralVariant::Inversion { start, end, .. } => (*start, *end),
_ => return SvReadEffect::Unaffected,
};
let read_end = read_start + read.len() as u64;
if read_end <= inv_start || read_start >= inv_end {
return SvReadEffect::Unaffected;
}
let overlap_start = inv_start.max(read_start);
let overlap_end = inv_end.min(read_end);
let local_start = (overlap_start - read_start) as usize;
let local_end = (overlap_end - read_start) as usize;
revcomp_slice(&mut read.seq[local_start..local_end]);
read.qual[local_start..local_end].reverse();
SvReadEffect::Modified
}
pub fn apply_duplication(read: &mut Read, read_start: u64, sv: &StructuralVariant) -> SvReadEffect {
let (dup_start, dup_end, _copies) = match sv {
StructuralVariant::Duplication {
start, end, copies, ..
} => (*start, *end, *copies),
_ => return SvReadEffect::Unaffected,
};
let read_end = read_start + read.len() as u64;
if read_start >= dup_start && read_start < dup_end && read_end > dup_end {
let keep = (dup_end - read_start) as usize;
soft_clip_right(read, keep, read.len());
return SvReadEffect::SplitRight;
}
if read_start < dup_start && read_end > dup_start && read_end <= dup_end {
return SvReadEffect::Modified; }
SvReadEffect::Unaffected
}
pub fn apply_translocation(
read: &mut Read,
read_start: u64,
chrom: &str,
sv: &StructuralVariant,
partner_seq: &[u8],
) -> SvReadEffect {
let (chrom1, pos1, _chrom2, _pos2) = match sv {
StructuralVariant::Translocation {
chrom1,
pos1,
chrom2,
pos2,
} => (chrom1.as_str(), *pos1, chrom2.as_str(), *pos2),
_ => return SvReadEffect::Unaffected,
};
if chrom != chrom1 {
return SvReadEffect::Unaffected;
}
let read_end = read_start + read.len() as u64;
if pos1 < read_start || pos1 >= read_end {
return SvReadEffect::Unaffected;
}
let split_offset = (pos1 - read_start) as usize;
let bases_from_partner = read.len() - split_offset;
let available = partner_seq.len().min(bases_from_partner);
read.seq[split_offset..split_offset + available].copy_from_slice(&partner_seq[..available]);
for b in &mut read.seq[split_offset + available..] {
*b = b'N';
}
for q in &mut read.qual[split_offset..] {
*q = 10;
}
SvReadEffect::Chimeric
}
#[cfg(test)]
pub fn sample_sv_alt_count<R: Rng>(total_depth: u32, expected_vaf: f64, rng: &mut R) -> u32 {
sample_alt_count(total_depth, expected_vaf, rng)
}
pub fn sv_vcf_info(sv: &StructuralVariant) -> String {
match sv {
StructuralVariant::Deletion { start, end, .. } => {
let svlen = end.saturating_sub(*start);
format!("SVTYPE=DEL;END={end};SVLEN=-{svlen}")
}
StructuralVariant::Insertion { pos, sequence, .. } => {
let svlen = sequence.len();
format!("SVTYPE=INS;END={pos};SVLEN={svlen}")
}
StructuralVariant::Inversion { start, end, .. } => {
let svlen = end.saturating_sub(*start);
format!("SVTYPE=INV;END={end};SVLEN={svlen}")
}
StructuralVariant::Duplication {
start, end, copies, ..
} => {
let svlen = end.saturating_sub(*start);
format!("SVTYPE=DUP;END={end};SVLEN={svlen};CN={copies}")
}
StructuralVariant::Translocation { chrom2, pos2, .. } => {
format!("SVTYPE=BND;MATECHROM={chrom2};MATEPOS={pos2}")
}
}
}
pub fn sv_vcf_alt(sv: &StructuralVariant) -> String {
match sv {
StructuralVariant::Deletion { .. } => "<DEL>".to_string(),
StructuralVariant::Insertion { .. } => "<INS>".to_string(),
StructuralVariant::Inversion { .. } => "<INV>".to_string(),
StructuralVariant::Duplication { .. } => "<DUP>".to_string(),
StructuralVariant::Translocation { chrom2, pos2, .. } => {
format!("N]{chrom2}:{pos2}]")
}
}
}
fn soft_clip_right(read: &mut Read, keep_bases: usize, _original_len: usize) {
let len = read.len();
let keep = keep_bases.min(len);
for i in keep..len {
read.seq[i] = b'N';
read.qual[i] = 0;
}
}
fn soft_clip_left(read: &mut Read, clip_len: usize, _original_len: usize) {
let len = read.len();
let clip = clip_len.min(len);
for i in 0..clip {
read.seq[i] = b'N';
read.qual[i] = 0;
}
}
fn revcomp_slice(seq: &mut [u8]) {
seq.reverse();
for b in seq.iter_mut() {
*b = complement(*b);
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::rngs::StdRng;
use rand::SeedableRng;
fn make_read(seq: &[u8]) -> Read {
Read::new(seq.to_vec(), vec![30; seq.len()])
}
#[test]
fn test_deletion_split_reads() {
let sv = StructuralVariant::Deletion {
chrom: "chr1".into(),
start: 110,
end: 120,
};
let mut read = make_read(b"ACGTACGTACGTACGTACGT"); let effect = apply_deletion(&mut read, 105, &sv);
assert_eq!(
effect,
SvReadEffect::SplitLeft,
"read spanning left breakpoint should be split-left"
);
let keep = (110 - 105) as usize; for i in 0..keep {
assert_ne!(read.seq[i], b'N', "kept bases should not be N");
}
for i in keep..read.len() {
assert_eq!(read.seq[i], b'N', "clipped bases should be N");
assert_eq!(read.qual[i], 0, "clipped bases should have quality 0");
}
}
#[test]
fn test_deletion_coverage_drop() {
let sv = StructuralVariant::Deletion {
chrom: "chr1".into(),
start: 200,
end: 300,
};
let mut read = make_read(b"ACGTACGTACGT"); let effect = apply_deletion(&mut read, 210, &sv);
assert_eq!(
effect,
SvReadEffect::Deleted,
"read entirely within deleted region should be marked Deleted"
);
let mut read2 = make_read(b"ACGTACGT");
let effect2 = apply_deletion(&mut read2, 350, &sv);
assert_eq!(effect2, SvReadEffect::Unaffected);
}
#[test]
fn test_insertion_reads() {
let ins_seq = b"TTTT".to_vec();
let sv = StructuralVariant::Insertion {
chrom: "chr1".into(),
pos: 104,
sequence: ins_seq.clone(),
};
let mut read = make_read(b"ACGTACGTACGT"); let original_len = read.len();
let effect = apply_insertion(&mut read, 100, &sv);
assert_eq!(effect, SvReadEffect::Modified);
assert_eq!(
read.len(),
original_len,
"read length must be preserved after insertion"
);
assert_eq!(&read.seq[..4], b"ACGT");
assert_eq!(&read.seq[4..8], b"TTTT");
}
#[test]
fn test_inversion_orientation() {
let sv = StructuralVariant::Inversion {
chrom: "chr1".into(),
start: 100,
end: 108,
};
let seq = b"ACGTACGT";
let mut read = make_read(seq);
let effect = apply_inversion(&mut read, 100, &sv);
assert_eq!(effect, SvReadEffect::Modified);
let seq2 = b"AAAACCCC";
let mut read2 = make_read(seq2);
let effect2 = apply_inversion(&mut read2, 100, &sv);
assert_eq!(effect2, SvReadEffect::Modified);
assert_eq!(
&read2.seq, b"GGGGTTTT",
"inverted read should be reverse-complemented"
);
}
#[test]
fn test_duplication_coverage() {
let sv = StructuralVariant::Duplication {
chrom: "chr1".into(),
start: 200,
end: 250,
copies: 2,
};
let mut split_read = make_read(&[b'A'; 20]);
let effect = apply_duplication(&mut split_read, 240, &sv);
assert_eq!(
effect,
SvReadEffect::SplitRight,
"read spanning right dup boundary should be split"
);
let keep = (250 - 240) as usize; for i in 0..keep {
assert_ne!(split_read.seq[i], b'N', "kept bases must not be N");
}
for i in keep..split_read.len() {
assert_eq!(split_read.seq[i], b'N', "clipped bases should be N");
}
let mut outside_read = make_read(&[b'A'; 10]);
let effect2 = apply_duplication(&mut outside_read, 300, &sv);
assert_eq!(effect2, SvReadEffect::Unaffected);
}
#[test]
fn test_translocation_chimeric() {
let sv = StructuralVariant::Translocation {
chrom1: "chr1".into(),
pos1: 1000,
chrom2: "chr2".into(),
pos2: 5000,
};
let mut read = make_read(b"ACGTACGTACGTACGTACGT"); let partner = b"GGGGGGGGGGGGGGGGGGGG"; let effect = apply_translocation(&mut read, 990, "chr1", &sv, partner);
assert_eq!(effect, SvReadEffect::Chimeric);
assert_eq!(&read.seq[..10], b"ACGTACGTAC");
assert_eq!(&read.seq[10..20], &partner[..10]);
for q in &read.qual[10..] {
assert_eq!(*q, 10, "chimeric bases should have quality 10");
}
}
#[test]
fn test_sv_vaf_stochastic() {
let mut rng = StdRng::seed_from_u64(12345);
let depth = 100u32;
let vaf = 0.5;
let n_trials = 10_000u32;
let total_alt: u32 = (0..n_trials)
.map(|_| sample_sv_alt_count(depth, vaf, &mut rng))
.sum();
let mean_alt = total_alt as f64 / n_trials as f64;
assert!(
(mean_alt - 50.0).abs() < 1.0,
"mean alt count {mean_alt:.2} should be close to 50.0 at VAF=0.5"
);
let counts: Vec<u32> = (0..100)
.map(|_| sample_sv_alt_count(depth, vaf, &mut rng))
.collect();
let unique: std::collections::HashSet<_> = counts.iter().copied().collect();
assert!(unique.len() > 1, "SV VAF sampling should be stochastic");
}
#[test]
fn test_sv_truth_vcf() {
let del = StructuralVariant::Deletion {
chrom: "chr1".into(),
start: 1000,
end: 2000,
};
let info = sv_vcf_info(&del);
assert!(
info.contains("SVTYPE=DEL"),
"DEL INFO must contain SVTYPE=DEL"
);
assert!(info.contains("END=2000"), "DEL INFO must contain END");
assert!(info.contains("SVLEN=-1000"), "DEL INFO must contain SVLEN");
assert_eq!(sv_vcf_alt(&del), "<DEL>");
let ins = StructuralVariant::Insertion {
chrom: "chr1".into(),
pos: 500,
sequence: b"ACGT".to_vec(),
};
let info_ins = sv_vcf_info(&ins);
assert!(info_ins.contains("SVTYPE=INS"));
assert!(info_ins.contains("SVLEN=4"));
assert_eq!(sv_vcf_alt(&ins), "<INS>");
let tra = StructuralVariant::Translocation {
chrom1: "chr1".into(),
pos1: 1000,
chrom2: "chr2".into(),
pos2: 5000,
};
let info_bnd = sv_vcf_info(&tra);
assert!(info_bnd.contains("SVTYPE=BND"));
assert!(info_bnd.contains("MATECHROM=chr2"));
assert!(info_bnd.contains("MATEPOS=5000"));
let alt_bnd = sv_vcf_alt(&tra);
assert!(
alt_bnd.contains("chr2") && alt_bnd.contains("5000"),
"BND ALT {alt_bnd} must contain mate chrom and position"
);
assert!(
alt_bnd.starts_with('N'),
"BND ALT must start with N anchor base"
);
}
#[test]
fn test_insertion_quality_avg_no_overflow() {
let mut read = Read::new(
b"ACGTACGT".to_vec(),
vec![200u8, 200, 200, 200, 200, 200, 200, 200],
);
let sv = StructuralVariant::Insertion {
chrom: "chr1".into(),
pos: 102,
sequence: b"TT".to_vec(),
};
let mut rng = StdRng::seed_from_u64(0);
apply_insertion(&mut read, 100, &sv);
let _ = &mut rng; assert!(
read.qual.iter().all(|&q| q == 200),
"all qualities must remain 200 after insertion padding; got {:?}",
read.qual
);
}
}