use rustybam::bed::Region;
use rustybam::liftover::{break_paf_on_indels, trim_paf_rec_to_rgn_fast};
use rustybam::paf::PafRecord;
fn fwd_paf() -> PafRecord {
PafRecord::new("Q 12 0 12 + T 12 0 12 10 12 60 cg:Z:3=2I3=2D4=").unwrap()
}
fn rgn(st: u64, en: u64) -> Region {
Region {
name: "T".to_string(),
st,
en,
id: "".to_string(),
..Default::default()
}
}
#[test]
fn fwd_trim_block1_exact() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 3), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=");
assert_eq!((trim.t_st, trim.t_en), (0, 3));
assert_eq!((trim.q_st, trim.q_en), (0, 3));
}
#[test]
fn fwd_trim_block2_exact() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(3, 6), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=");
assert_eq!((trim.t_st, trim.t_en), (3, 6));
assert_eq!((trim.q_st, trim.q_en), (5, 8));
}
#[test]
fn fwd_trim_block3_exact() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(8, 12), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "4=");
assert_eq!((trim.t_st, trim.t_en), (8, 12));
assert_eq!((trim.q_st, trim.q_en), (8, 12));
}
#[test]
fn fwd_trim_span_insertion() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 6), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=2I3=");
assert_eq!((trim.t_st, trim.t_en), (0, 6));
assert_eq!((trim.q_st, trim.q_en), (0, 8));
}
#[test]
fn fwd_trim_span_deletion() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(3, 10), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=2D2=");
assert_eq!((trim.t_st, trim.t_en), (3, 10));
assert_eq!((trim.q_st, trim.q_en), (5, 10));
}
#[test]
fn fwd_trim_span_all_blocks() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 12), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=2I3=2D4=");
assert_eq!((trim.t_st, trim.t_en), (0, 12));
assert_eq!((trim.q_st, trim.q_en), (0, 12));
}
#[test]
fn fwd_trim_cut_into_block1() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(1, 2), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "1=");
assert_eq!((trim.t_st, trim.t_en), (1, 2));
assert_eq!((trim.q_st, trim.q_en), (1, 2));
}
#[test]
fn fwd_trim_cut_block1_into_block2() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(1, 5), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "2=2I2=");
assert_eq!((trim.t_st, trim.t_en), (1, 5));
assert_eq!((trim.q_st, trim.q_en), (1, 7));
}
#[test]
fn fwd_trim_middle_of_alignment() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(4, 9), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "2=2D1=");
assert_eq!((trim.t_st, trim.t_en), (4, 9));
assert_eq!((trim.q_st, trim.q_en), (6, 9));
}
#[test]
fn fwd_trim_both_sides() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(1, 10), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "2=2I3=2D2=");
assert_eq!((trim.t_st, trim.t_en), (1, 10));
assert_eq!((trim.q_st, trim.q_en), (1, 10));
}
#[test]
fn fwd_trim_deletion_only_returns_none() {
let paf = fwd_paf();
assert!(trim_paf_rec_to_rgn_fast(&rgn(6, 8), &paf).is_none());
}
#[test]
fn fwd_trim_ending_at_deletion() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 8), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=2I3=");
assert_eq!((trim.t_st, trim.t_en), (0, 6));
assert_eq!((trim.q_st, trim.q_en), (0, 8));
}
#[test]
fn fwd_trim_starting_at_deletion() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(6, 12), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "4=");
assert_eq!((trim.t_st, trim.t_en), (8, 12));
assert_eq!((trim.q_st, trim.q_en), (8, 12));
}
#[test]
fn fwd_trim_superset_region() {
let paf = fwd_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 20), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=2I3=2D4=");
assert_eq!((trim.t_st, trim.t_en), (0, 12));
assert_eq!((trim.q_st, trim.q_en), (0, 12));
}
#[test]
fn fwd_trim_no_overlap_returns_none() {
let paf = fwd_paf();
assert!(trim_paf_rec_to_rgn_fast(&rgn(15, 20), &paf).is_none());
}
fn rev_paf() -> PafRecord {
PafRecord::new("Q 12 0 12 - T 12 0 12 10 12 60 cg:Z:3=2I3=2D4=").unwrap()
}
#[test]
fn rev_trim_block1_exact() {
let paf = rev_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 3), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=");
assert_eq!((trim.t_st, trim.t_en), (0, 3));
assert_eq!((trim.q_st, trim.q_en), (9, 12));
}
#[test]
fn rev_trim_block2_exact() {
let paf = rev_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(3, 6), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=");
assert_eq!((trim.t_st, trim.t_en), (3, 6));
assert_eq!((trim.q_st, trim.q_en), (4, 7));
}
#[test]
fn rev_trim_block3_exact() {
let paf = rev_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(8, 12), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "4=");
assert_eq!((trim.t_st, trim.t_en), (8, 12));
assert_eq!((trim.q_st, trim.q_en), (0, 4));
}
#[test]
fn rev_trim_span_insertion() {
let paf = rev_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 6), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=2I3=");
assert_eq!((trim.t_st, trim.t_en), (0, 6));
assert_eq!((trim.q_st, trim.q_en), (4, 12));
}
#[test]
fn rev_trim_span_deletion() {
let paf = rev_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(3, 10), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=2D2=");
assert_eq!((trim.t_st, trim.t_en), (3, 10));
assert_eq!((trim.q_st, trim.q_en), (2, 7));
}
#[test]
fn rev_trim_span_all_blocks() {
let paf = rev_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 12), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=2I3=2D4=");
assert_eq!((trim.t_st, trim.t_en), (0, 12));
assert_eq!((trim.q_st, trim.q_en), (0, 12));
}
#[test]
fn rev_trim_both_sides() {
let paf = rev_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(1, 10), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "2=2I3=2D2=");
assert_eq!((trim.t_st, trim.t_en), (1, 10));
assert_eq!((trim.q_st, trim.q_en), (2, 11));
}
fn mismatch_paf() -> PafRecord {
PafRecord::new("Q 10 0 10 + T 10 0 10 8 10 60 cg:Z:3=1X2=1X3=").unwrap()
}
#[test]
fn mismatch_trim_around_first_snp() {
let paf = mismatch_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(2, 5), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "1=1X1=");
assert_eq!((trim.t_st, trim.t_en), (2, 5));
assert_eq!((trim.q_st, trim.q_en), (2, 5));
}
#[test]
fn mismatch_trim_between_snps() {
let paf = mismatch_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(3, 7), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "1X2=1X");
assert_eq!((trim.t_st, trim.t_en), (3, 7));
assert_eq!((trim.q_st, trim.q_en), (3, 7));
}
fn cs_paf() -> PafRecord {
PafRecord::new("Q 12 0 12 + T 12 0 12 10 12 60 cs:Z::3+ga:3-cg:4").unwrap()
}
#[test]
fn cs_parsed_cigar_correct() {
let paf = cs_paf();
assert_eq!(paf.cigar.to_string(), "3=2I3=2D4=");
assert!(paf.cs_ops.is_some());
}
#[test]
fn cs_trim_block1_preserves_cs() {
let paf = cs_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 3), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=");
assert!(trim.cs_ops.is_some());
let cs_str: String = trim.cs_ops.unwrap().to_cs_string();
assert_eq!(cs_str, ":3");
}
#[test]
fn cs_trim_span_insertion_preserves_cs() {
let paf = cs_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 6), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=2I3=");
assert!(trim.cs_ops.is_some());
let cs_str: String = trim.cs_ops.unwrap().to_cs_string();
assert_eq!(cs_str, ":3+ga:3");
}
#[test]
fn cs_trim_partial_preserves_cs() {
let paf = cs_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(1, 10), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "2=2I3=2D2=");
assert!(trim.cs_ops.is_some());
let cs_str: String = trim.cs_ops.unwrap().to_cs_string();
assert_eq!(cs_str, ":2+ga:3-cg:2");
}
fn cs_extended_paf() -> PafRecord {
PafRecord::new("Q 8 0 8 + T 8 0 8 7 8 60 cs:Z:=ACT*ag=ACGT").unwrap()
}
#[test]
fn cs_extended_parsed_correctly() {
let paf = cs_extended_paf();
assert_eq!(paf.cigar.to_string(), "3=1X4=");
assert!(paf.cs_ops.is_some());
}
#[test]
fn cs_extended_trim_cuts_matchseq() {
let paf = cs_extended_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(2, 5), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "1=1X1=");
assert!(trim.cs_ops.is_some());
let cs_str: String = trim.cs_ops.unwrap().to_cs_string();
assert_eq!(cs_str, "=T*ag=A");
}
#[test]
fn cs_extended_trim_full_preserves() {
let paf = cs_extended_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 8), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=1X4=");
let cs_str: String = trim.cs_ops.unwrap().to_cs_string();
assert_eq!(cs_str, "=ACT*ag=ACGT");
}
#[test]
fn break_fwd_at_all_indels() {
let paf = fwd_paf();
let broken = break_paf_on_indels(&paf, 0);
assert_eq!(broken.len(), 3, "Should break into 3 records");
assert_eq!(broken[0].cigar.to_string(), "3=");
assert_eq!((broken[0].t_st, broken[0].t_en), (0, 3));
assert_eq!((broken[0].q_st, broken[0].q_en), (0, 3));
assert_eq!(broken[1].cigar.to_string(), "3=");
assert_eq!((broken[1].t_st, broken[1].t_en), (3, 6));
assert_eq!((broken[1].q_st, broken[1].q_en), (5, 8));
assert_eq!(broken[2].cigar.to_string(), "4=");
assert_eq!((broken[2].t_st, broken[2].t_en), (8, 12));
assert_eq!((broken[2].q_st, broken[2].q_en), (8, 12));
}
#[test]
fn break_rev_at_all_indels() {
let paf = rev_paf();
let broken = break_paf_on_indels(&paf, 0);
assert_eq!(broken.len(), 3, "Should break into 3 records");
assert_eq!(broken[0].cigar.to_string(), "3=");
assert_eq!((broken[0].t_st, broken[0].t_en), (0, 3));
assert_eq!((broken[0].q_st, broken[0].q_en), (9, 12));
assert_eq!(broken[1].cigar.to_string(), "3=");
assert_eq!((broken[1].t_st, broken[1].t_en), (3, 6));
assert_eq!((broken[1].q_st, broken[1].q_en), (4, 7));
assert_eq!(broken[2].cigar.to_string(), "4=");
assert_eq!((broken[2].t_st, broken[2].t_en), (8, 12));
assert_eq!((broken[2].q_st, broken[2].q_en), (0, 4));
}
#[test]
fn break_no_break_when_threshold_too_high() {
let paf = fwd_paf();
let broken = break_paf_on_indels(&paf, 2);
assert_eq!(broken.len(), 1, "No breaks when threshold too high");
assert_eq!(broken[0].cigar.to_string(), "3=2I3=2D4=");
}
#[test]
fn break_selective_threshold() {
let paf = PafRecord::new("Q 11 0 11 + T 15 0 15 10 15 60 cg:Z:3=1I3=5D4=").unwrap();
let broken = break_paf_on_indels(&paf, 2);
assert_eq!(broken.len(), 2, "Should break at 5D only");
assert_eq!(broken[0].cigar.to_string(), "3=1I3=");
assert_eq!((broken[0].t_st, broken[0].t_en), (0, 6));
assert_eq!(broken[1].cigar.to_string(), "4=");
assert_eq!((broken[1].t_st, broken[1].t_en), (11, 15));
}
#[test]
fn truncate_by_query_first_half() {
let mut paf = fwd_paf();
paf.truncate_record_by_query(0, 8);
assert_eq!(paf.cigar.to_string(), "3=2I3=");
assert_eq!((paf.t_st, paf.t_en), (0, 6));
assert_eq!((paf.q_st, paf.q_en), (0, 8));
}
#[test]
fn truncate_by_query_second_half() {
let mut paf = fwd_paf();
paf.truncate_record_by_query(5, 12);
assert_eq!(paf.cigar.to_string(), "3=2D4=");
assert_eq!((paf.t_st, paf.t_en), (3, 12));
assert_eq!((paf.q_st, paf.q_en), (5, 12));
}
#[test]
fn truncate_by_query_middle() {
let mut paf = fwd_paf();
paf.truncate_record_by_query(4, 9);
assert_eq!(paf.cigar.to_string(), "3=2D1=");
assert_eq!((paf.t_st, paf.t_en), (3, 9));
assert_eq!((paf.q_st, paf.q_en), (5, 9));
}
#[test]
fn truncate_rev_by_query_high_end() {
let mut paf = rev_paf();
paf.truncate_record_by_query(4, 12);
assert_eq!(paf.cigar.to_string(), "3=2I3=");
assert_eq!((paf.t_st, paf.t_en), (0, 6));
assert_eq!((paf.q_st, paf.q_en), (4, 12));
}
#[test]
fn truncate_rev_by_query_low_end() {
let mut paf = rev_paf();
paf.truncate_record_by_query(0, 7);
assert_eq!(paf.cigar.to_string(), "3=2D4=");
assert_eq!((paf.t_st, paf.t_en), (3, 12));
assert_eq!((paf.q_st, paf.q_en), (0, 7));
}
#[test]
fn existing_test_fwd_trim_14_15() {
let paf = PafRecord::new("Q 10 2 10 + T 40 12 20 3 9 60 cg:Z:4M1I1=1D2=").unwrap();
let trim = trim_paf_rec_to_rgn_fast(&rgn(14, 15), &paf).unwrap();
assert_eq!((trim.q_st, trim.q_en), (4, 5));
assert_eq!((trim.t_st, trim.t_en), (14, 15));
}
#[test]
fn existing_test_fwd_trim_14_18() {
let paf = PafRecord::new("Q 10 2 10 + T 40 12 20 3 9 60 cg:Z:4M1I1=1D2=").unwrap();
let trim = trim_paf_rec_to_rgn_fast(&rgn(14, 18), &paf).unwrap();
assert_eq!((trim.q_st, trim.q_en), (4, 8));
assert_eq!((trim.t_st, trim.t_en), (14, 17));
assert_eq!(trim.cigar.to_string(), "2M1I1=");
}
#[test]
fn existing_test_rev_trim_14_15() {
let paf = PafRecord::new("Q 10 2 10 - T 40 12 20 3 9 60 cg:Z:4M1I1=1D2=").unwrap();
let trim = trim_paf_rec_to_rgn_fast(&rgn(14, 15), &paf).unwrap();
assert_eq!((trim.q_st, trim.q_en), (7, 8));
assert_eq!((trim.t_st, trim.t_en), (14, 15));
}
#[test]
fn single_base_trim() {
let paf = PafRecord::new("Q 1 0 1 + T 1 0 1 1 1 60 cg:Z:1=").unwrap();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 1), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "1=");
assert_eq!((trim.t_st, trim.t_en), (0, 1));
assert_eq!((trim.q_st, trim.q_en), (0, 1));
}
#[test]
fn all_insertion_returns_none() {
let paf = PafRecord::new("Q 5 0 5 + T 5 0 0 0 5 60 cg:Z:5I");
if let Ok(paf) = paf {
assert!(trim_paf_rec_to_rgn_fast(&rgn(0, 5), &paf).is_none());
}
}
#[test]
fn complex_multiple_indels() {
let paf =
PafRecord::new("Q 15 0 15 + T 14 0 14 10 14 60 cg:Z:2=3I3=1D2=2D1=1I3=").unwrap();
let trim = trim_paf_rec_to_rgn_fast(&rgn(2, 11), &paf).unwrap();
assert_eq!(trim.cigar.to_string(), "3=1D2=2D1=");
assert_eq!((trim.t_st, trim.t_en), (2, 11));
assert_eq!((trim.q_st, trim.q_en), (5, 11));
}
#[test]
fn cs_break_preserves_cs_ops() {
let paf = cs_paf();
let broken = break_paf_on_indels(&paf, 0);
assert_eq!(broken.len(), 3);
for rec in &broken {
assert!(rec.cs_ops.is_some(), "cs_ops should be preserved through break-paf");
}
let cs1: String = broken[0].cs_ops.as_ref().unwrap().to_cs_string();
let cs2: String = broken[1].cs_ops.as_ref().unwrap().to_cs_string();
let cs3: String = broken[2].cs_ops.as_ref().unwrap().to_cs_string();
assert_eq!(cs1, ":3");
assert_eq!(cs2, ":3");
assert_eq!(cs3, ":4");
}
fn big_del_paf() -> PafRecord {
PafRecord::new(
"Q 100000 0 100000 + T 130000 0 130000 100000 130000 60 cg:Z:50000M30000D50000M",
)
.unwrap()
}
#[test]
fn trim_boundary_before_deletion() {
let paf = big_del_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 40000), &paf).unwrap();
assert_eq!((trim.t_st, trim.t_en), (0, 40000));
assert_eq!((trim.q_st, trim.q_en), (0, 40000));
assert_eq!(trim.cigar.to_string(), "40000M");
}
#[test]
fn trim_boundary_at_deletion_start() {
let paf = big_del_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 50000), &paf).unwrap();
assert_eq!((trim.t_st, trim.t_en), (0, 50000));
assert_eq!((trim.q_st, trim.q_en), (0, 50000));
assert_eq!(trim.cigar.to_string(), "50000M");
}
#[test]
fn trim_boundary_inside_deletion() {
let paf = big_del_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 70000), &paf).unwrap();
assert_eq!((trim.t_st, trim.t_en), (0, 50000));
assert_eq!((trim.q_st, trim.q_en), (0, 50000));
assert_eq!(trim.cigar.to_string(), "50000M");
}
#[test]
fn trim_boundary_at_deletion_end() {
let paf = big_del_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 80000), &paf).unwrap();
assert_eq!((trim.t_st, trim.t_en), (0, 50000));
assert_eq!((trim.q_st, trim.q_en), (0, 50000));
assert_eq!(trim.cigar.to_string(), "50000M");
}
#[test]
fn trim_boundary_after_deletion() {
let paf = big_del_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(0, 90000), &paf).unwrap();
assert_eq!((trim.t_st, trim.t_en), (0, 90000));
assert_eq!((trim.q_st, trim.q_en), (0, 60000));
assert_eq!(trim.cigar.to_string(), "50000M30000D10000M");
}
#[test]
fn trim_start_inside_deletion() {
let paf = big_del_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(60000, 90000), &paf).unwrap();
assert_eq!((trim.t_st, trim.t_en), (80000, 90000));
assert_eq!((trim.q_st, trim.q_en), (50000, 60000));
assert_eq!(trim.cigar.to_string(), "10000M");
}
#[test]
fn trim_entirely_inside_deletion() {
let paf = big_del_paf();
let trim = trim_paf_rec_to_rgn_fast(&rgn(55000, 75000), &paf);
assert!(
trim.is_none(),
"Region entirely within deletion should produce no output"
);
}