use assert_cmd::cargo::cargo_bin_cmd;
use bstr::ByteSlice;
use crate::common::{twitcher_cmd, twitcher_with_ref};
mod common;
fn test_has_ts(file: &str, padding: u32) {
let cmd = twitcher_with_ref(&format!(
"tests/data/{file} --padding {padding} --cluster-min-records 1 -vvv --costs tests/old_test_costs.tsa"
));
assert!(cmd.status.success());
let stdout_str = cmd.stdout.as_bstr();
let stderr_str = cmd.stderr.as_bstr();
eprintln!("{stderr_str}");
let records: Vec<_> = stdout_str
.lines()
.filter(|l| !l.starts_with(b"#"))
.collect();
for r in &records {
println!("{}", r.as_bstr());
}
let has_ts = records.iter().any(|r| r.contains_str(b"CIGARETS"));
assert!(has_ts);
}
#[test]
fn test_single_mnv() {
test_has_ts("single_mnv.vcf", 10);
}
#[test]
fn test_single_insertion() {
test_has_ts("single_insertion.vcf", 20);
}
#[test]
fn test_output_sorted() {
let cmd = twitcher_with_ref("tests/data/sorted_output.vcf.gz --padding 20");
assert!(cmd.status.success());
let mut last_chr = None;
let mut last_pos = 0;
for record in cmd
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#"))
{
let mut tokens = record.as_bstr().split_str(b"\t");
let chr = tokens.next().unwrap();
assert!(
last_chr.is_none_or(|last| last == chr),
"the chr is changing for an example where it shouldn't",
);
last_chr = Some(chr);
let pos = tokens.next().unwrap().to_str().unwrap().parse().unwrap();
assert!(pos >= last_pos, "The output is not sorted");
last_pos = pos;
}
}
fn test_region_output(filename: &str, expected: &str) {
let dir = tempfile::tempdir().unwrap();
let regions = format!("{}/{filename}", dir.path().to_str().unwrap());
let mut cmd = cargo_bin_cmd!("twitcher");
cmd.args([
"vcf",
"./test_files/test.vcf",
"--reference",
"./test_files/test.fa",
"--output-regions",
®ions,
"--cluster-min-records",
"1",
"--costs",
"./tests/old_test_costs.tsa",
]);
let res = cmd.unwrap();
let stderr_str = res.stderr.as_bstr();
eprintln!("{stderr_str}");
assert!(res.status.success());
let output = std::fs::read_to_string(regions).unwrap();
assert_eq!(output, expected);
}
#[test]
fn test_region_output_tab() {
test_region_output("regions.some.ext", "chrZ\t101\t108\nchrZ\t201\t207\n");
}
#[test]
fn test_region_output_bed() {
test_region_output("regions.bed", "chrZ\t100\t108\nchrZ\t200\t207\n");
}
const TS_FIXTURE: &str = "tests/data/ts_chr1_4304586.vcf --cluster-min-records 1 --padding 30";
#[test]
fn test_ts_detection_default_costs() {
let cmd = twitcher_with_ref(TS_FIXTURE);
let stderr_str = cmd.stderr.as_bstr();
eprintln!("{stderr_str}");
assert!(cmd.status.success());
let has_ts = cmd
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#"))
.any(|r| r.contains_str(b"CIGARETS"));
assert!(has_ts, "expected CIGARETS in output with default costs");
}
#[test]
fn test_no_ts_flag_suppresses_template_switches() {
let cmd = twitcher_with_ref(&format!("{TS_FIXTURE} --no-ts"));
let stderr_str = cmd.stderr.as_bstr();
eprintln!("{stderr_str}");
assert!(cmd.status.success());
let has_ts = cmd
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#"))
.any(|r| r.contains_str(b"CIGARETS"));
assert!(!has_ts, "expected no CIGARETS when --no-ts is set");
}
#[test]
fn test_only_realigned_outputs_ts_records_only() {
let cmd = twitcher_with_ref(&format!("{TS_FIXTURE} --only-realigned"));
let stderr_str = cmd.stderr.as_bstr();
eprintln!("{stderr_str}");
assert!(cmd.status.success());
let data_lines: Vec<_> = cmd
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#"))
.collect();
assert!(
!data_lines.is_empty(),
"expected at least one output record"
);
for line in &data_lines {
assert!(
line.contains_str(b"CIGARETS"),
"non-CIGARETS record emitted with --only-realigned: {}",
line.as_bstr()
);
}
}
#[test]
fn test_csv_output_created() {
let dir = tempfile::tempdir().unwrap();
let csv_path = dir.path().join("output.csv");
let cmd = twitcher_with_ref(&format!(
"{TS_FIXTURE} --csv {}",
csv_path.to_str().unwrap()
));
let stderr_str = cmd.stderr.as_bstr();
eprintln!("{stderr_str}");
assert!(cmd.status.success());
assert!(csv_path.exists(), "CSV file was not created");
let content = std::fs::read_to_string(&csv_path).unwrap();
let mut lines = content.lines();
let header = lines.next().expect("CSV must have a header row");
assert!(!header.is_empty(), "CSV header must not be empty");
assert!(
lines.next().is_some(),
"CSV must have at least one data row for the detected TS"
);
}
#[test]
fn test_fpa_detects_template_switches() {
let cmd = twitcher_with_ref(&format!("{TS_FIXTURE} --fpa"));
let stderr_str = cmd.stderr.as_bstr();
eprintln!("{stderr_str}");
assert!(cmd.status.success());
let has_ts = cmd
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#"))
.any(|r| r.contains_str(b"CIGARETS"));
assert!(has_ts, "expected CIGARETS in output with --fpa");
}
#[test]
fn test_no_panic_on_null_leading_match() {
let cmd = twitcher_cmd(&[
"vcf",
"tests/data/crash_chr4_leading_match.vcf.gz",
"--reference",
"tests/data/hg38.chr4_part.fa.gz",
"--threads",
"1",
"-vvv",
]);
assert!(cmd.status.success());
}
#[test]
fn test_an_ac_correct_in_realigned_records() {
let cmd = twitcher_with_ref(TS_FIXTURE);
assert!(cmd.status.success());
let realigned: Vec<_> = cmd
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#") && l.contains_str(b"CIGARETS"))
.collect();
assert!(
!realigned.is_empty(),
"expected at least one realigned record"
);
for record in &realigned {
let info = record
.split_str(b"\t")
.nth(7)
.expect("VCF record must have INFO field");
assert!(
info.contains_str(b"AN=2"),
"AN must be 2 (two called alleles from 0/1 genotype), got: {}",
info.as_bstr()
);
assert!(
info.contains_str(b"AC=1"),
"AC must be 1 (one alt allele from 0/1 genotype), got: {}",
info.as_bstr()
);
}
}
#[test]
fn test_phased_split_cluster_correlates_clustergrp() {
let mut cmd = cargo_bin_cmd!("twitcher");
cmd.args([
"vcf",
"./test_files/phased_split_cluster.vcf",
"--reference",
"./test_files/test.fa",
"--cluster-min-records",
"1",
"--costs",
"./tests/old_test_costs.tsa",
]);
let res = cmd.unwrap();
let stderr_str = res.stderr.as_bstr();
eprintln!("{stderr_str}");
assert!(res.status.success());
let stdout = res.stdout.as_bstr();
let ts_records: Vec<_> = stdout
.lines()
.filter(|l| !l.starts_with(b"#") && l.contains_str(b"CIGARETS"))
.collect();
assert_eq!(
ts_records.len(),
2,
"expected two TS records, one per haplotype sub-cluster"
);
let clustergrps: Vec<_> = ts_records
.iter()
.filter_map(|rec| {
rec.split_str(b"\t")
.nth(7) .and_then(|info| {
info.split_str(b";")
.find(|f| f.starts_with(b"CLUSTERGRP="))
.map(|f| f[b"CLUSTERGRP=".len()..].to_vec())
})
})
.collect();
assert_eq!(clustergrps.len(), 2, "both TS records must have CLUSTERGRP");
assert_eq!(
clustergrps[0], clustergrps[1],
"TS records from the same proximity cluster must share CLUSTERGRP"
);
let gts: Vec<_> = ts_records
.iter()
.filter_map(|rec| {
rec.split_str(b"\t")
.nth(9) .and_then(|sample| sample.split_str(b":").next().map(<[u8]>::to_vec))
})
.collect();
assert!(
gts.iter().any(|g| g == b"0|1"),
"hap=1 sub-cluster should produce 0|1 GT"
);
assert!(
gts.iter().any(|g| g == b"1|0"),
"hap=0 sub-cluster should produce 1|0 GT"
);
}
#[test]
fn test_multiallelic_1_2_uses_per_haplotype_allele() {
let mut cmd = cargo_bin_cmd!("twitcher");
cmd.args([
"vcf",
"./test_files/multiallelic_1_2_both_realign.vcf",
"--reference",
"./test_files/test.fa",
"--cluster-min-records",
"1",
"--padding",
"20",
"--costs",
"./tests/old_test_costs.tsa",
]);
let res = cmd.unwrap();
eprintln!("{}", res.stderr.as_bstr());
assert!(res.status.success());
let ts_records: Vec<_> = res
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#") && l.contains_str(b"CIGARETS"))
.collect();
assert_eq!(ts_records.len(), 2, "expected one TS record per haplotype");
let pairs: Vec<(Vec<u8>, Vec<u8>)> = ts_records
.iter()
.map(|rec| {
let cols: Vec<_> = rec.split_str(b"\t").collect();
let alt = cols[4].to_vec();
let gt = cols[9].split_str(b":").next().unwrap().to_vec();
(alt, gt)
})
.collect();
assert!(
pairs
.iter()
.any(|(alt, gt)| alt == b"TTTTTTT" && gt == b"1|0"),
"H0 must realign allele 1 (TTTTTTT) and emit 1|0, got {pairs:?}"
);
assert!(
pairs
.iter()
.any(|(alt, gt)| alt == b"TTTTTT" && gt == b"0|1"),
"H1 must realign allele 2 (TTTTTT) and emit 0|1, got {pairs:?}"
);
}
fn run_vcf_data_lines(fixture: &str, extra: &[&str]) -> Vec<Vec<u8>> {
let mut cmd = cargo_bin_cmd!("twitcher");
cmd.args([
"vcf",
&format!("./test_files/{fixture}"),
"--reference",
"./test_files/test.fa",
"--cluster-min-records",
"1",
"--padding",
"20",
"--costs",
"./tests/old_test_costs.tsa",
]);
cmd.args(extra);
let res = cmd.unwrap();
eprintln!("{}", res.stderr.as_bstr());
assert!(res.status.success());
res.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#"))
.map(<[u8]>::to_vec)
.collect()
}
fn alt_and_gt(line: &[u8]) -> (Vec<u8>, Vec<u8>) {
let cols: Vec<_> = line.split_str(b"\t").collect();
let alt = cols[4].to_vec();
let gt = cols[9].split_str(b":").next().unwrap().to_vec();
(alt, gt)
}
#[test]
fn test_partial_realign_reconstructs_other_haplotype() {
let lines = run_vcf_data_lines("multiallelic_1_2.vcf", &[]);
assert_eq!(lines.len(), 2, "one TS + one reconstructed record");
let ts: Vec<_> = lines
.iter()
.filter(|l| l.contains_str(b"CIGARETS"))
.collect();
assert_eq!(ts.len(), 1, "exactly one TS record");
let pairs: Vec<_> = lines.iter().map(|l| alt_and_gt(l)).collect();
assert!(pairs.iter().any(|(a, g)| a == b"TTTTTTT" && g == b"1|0"));
assert!(pairs.iter().any(|(a, g)| a == b"CCCCCCC" && g == b"0|1"));
assert!(
lines
.iter()
.all(|l| !l.split_str(b"\t").nth(4).unwrap().contains_str(b","))
);
}
#[test]
fn test_no_ts_split_cluster_emits_no_duplicates() {
let lines = run_vcf_data_lines("multiallelic_1_2_no_ts.vcf", &[]);
assert_eq!(lines.len(), 2, "two reconstructed records, no duplicates");
assert!(lines.iter().all(|l| !l.contains_str(b"CIGARETS")));
let pairs: Vec<_> = lines.iter().map(|l| alt_and_gt(l)).collect();
assert!(pairs.iter().any(|(a, g)| a == b"C" && g == b"1|0"));
assert!(pairs.iter().any(|(a, g)| a == b"G" && g == b"0|1"));
}
#[test]
fn test_hom_alt_non_one_allele_emitted_once() {
let lines = run_vcf_data_lines("hom_alt_2_2.vcf", &[]);
assert_eq!(lines.len(), 1, "hom-alt emitted exactly once");
let (alt, gt) = alt_and_gt(&lines[0]);
assert_eq!(alt, b"G", "must use allele 2's sequence");
assert_eq!(gt, b"1/1");
}
#[test]
fn test_phased_split_preserves_slot_to_haplotype_orientation() {
let lines = run_vcf_data_lines("phased_2_1_orientation.vcf", &[]);
assert_eq!(lines.len(), 2);
let recs: Vec<(Vec<u8>, Vec<u8>, Vec<u8>)> = lines
.iter()
.map(|l| {
let cols: Vec<_> = l.split_str(b"\t").collect();
let mut sample = cols[9].split_str(b":");
let gt = sample.next().unwrap().to_vec();
let ps = sample.next().unwrap().to_vec();
(cols[4].to_vec(), gt, ps)
})
.collect();
assert!(
recs.iter()
.any(|(alt, gt, ps)| alt == b"TTTTTT" && gt == b"1|0" && ps == b"42"),
"1|0 must carry slot-0 allele (allele 2, TTTTTT) with PS=42, got {recs:?}"
);
assert!(
recs.iter()
.any(|(alt, gt, ps)| alt == b"TTTTTTT" && gt == b"0|1" && ps == b"42"),
"0|1 must carry slot-1 allele (allele 1, TTTTTTT) with PS=42, got {recs:?}"
);
}
#[test]
fn test_phasing_bam_flag_accepted_and_pipeline_succeeds() {
let without_bam = twitcher_with_ref(TS_FIXTURE);
let with_bam = twitcher_with_ref(&format!(
"{TS_FIXTURE} --phasing-bam tests/data/reads/test.bam"
));
let stderr = with_bam.stderr.as_bstr();
eprintln!("{stderr}");
assert!(with_bam.status.success(), "--phasing-bam must not crash");
let count_without = without_bam
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#"))
.count();
let count_with = with_bam
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#"))
.count();
assert_eq!(
count_without, count_with,
"--phasing-bam with no overlapping reads must not change record count"
);
}
#[test]
fn test_target_region_empty_range_produces_no_records() {
let cmd = twitcher_with_ref(&format!("{TS_FIXTURE} -t chr1:1-10"));
let stderr_str = cmd.stderr.as_bstr();
eprintln!("{stderr_str}");
assert!(cmd.status.success());
let data_count = cmd
.stdout
.as_bstr()
.lines()
.filter(|l| !l.starts_with(b"#"))
.count();
assert_eq!(
data_count, 0,
"target region chr1:1-10 should yield no records"
);
}