use std::{collections::HashSet, sync::Arc};
use rust_htslib::{
bam::{self, FetchDefinition, Read as _},
bcf::{self, record::GenotypeAllele},
};
use tracing::warn;
use crate::{
common::coords::GenomeRegion, counter, vcf::pipeline::clusterizer::phasing::GtAndPhase,
};
#[derive(clap::Args, Debug, Clone)]
pub struct PhasingSettings {
#[arg(long = "phasing-min-reads", default_value_t = 3)]
pub min_reads: u32,
#[arg(long = "phasing-confidence", default_value_t = 0.8)]
pub confidence: f64,
#[arg(long = "phasing-min-mapq", default_value_t = 20)]
pub min_mapq: u8,
#[arg(long = "phasing-min-base-qual", default_value_t = 20)]
pub min_base_qual: u8,
}
impl Default for PhasingSettings {
fn default() -> Self {
Self {
min_reads: 3,
confidence: 0.8,
min_mapq: 20,
min_base_qual: 20,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum Orientation {
Direct,
Flipped,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum Decision {
Resolved(Orientation),
NoEvidence,
InsufficientReads,
LowConfidence,
}
fn record_outcome(decision: Decision) -> Option<Orientation> {
match decision {
Decision::Resolved(o) => {
counter!("phasing.records.resolved").inc(1);
Some(o)
}
Decision::NoEvidence => {
counter!("phasing.records.unresolved.no_evidence").inc(1);
None
}
Decision::InsufficientReads => {
counter!("phasing.records.unresolved.insufficient_reads").inc(1);
None
}
Decision::LowConfidence => {
counter!("phasing.records.unresolved.low_confidence").inc(1);
None
}
}
}
struct NodeInfo {
pos: i64,
slots: [u32; 2],
alleles: Vec<Vec<u8>>,
}
impl NodeInfo {
fn slot_label(&self, read: &bam::Record, min_base_qual: u8) -> Option<(bool, bool)> {
let refs: Vec<&[u8]> = self.alleles.iter().map(Vec::as_slice).collect();
let idx = read_allele_index_at_pos(read, self.pos, &refs, min_base_qual)?;
if idx == self.slots[0] {
Some((false, idx == 0))
} else if idx == self.slots[1] {
Some((true, idx == 0))
} else {
None
}
}
}
#[derive(Debug)]
#[allow(dead_code)]
pub enum BamPhaseResolverError {
MultiplePhaseSets(HashSet<i32>),
BamReadError(rust_htslib::errors::Error),
Other(anyhow::Error),
}
pub fn resolve_phasing(
records: &mut [Arc<bcf::Record>],
region: &GenomeRegion,
reader: &mut bam::IndexedReader,
settings: &PhasingSettings,
) -> Result<(), BamPhaseResolverError> {
let unphased_indices: Vec<usize> = records
.iter()
.enumerate()
.filter_map(|(i, rec)| {
let gtp = GtAndPhase::from(&**rec);
(gtp.is_unphased_het() && !gtp.has_missing()).then_some(i)
})
.collect();
if unphased_indices.is_empty() {
return Ok(());
}
let anchors: Vec<(usize, GtAndPhase)> = records
.iter()
.enumerate()
.filter_map(|(i, rec)| {
let gtp = GtAndPhase::from(&**rec);
if gtp.phase.is_some() && !gtp.is_hom() {
Some((i, gtp))
} else {
None
}
})
.collect();
let distinct_ps: HashSet<i32> = anchors.iter().map(|(_, ps)| ps.phase.unwrap()).collect();
if distinct_ps.len() > 1 {
return Err(BamPhaseResolverError::MultiplePhaseSets(distinct_ps));
}
let node_indices: Vec<usize> = anchors
.iter()
.map(|(i, _)| *i)
.chain(unphased_indices.iter().copied())
.collect();
let n = node_indices.len();
let anchor_count = anchors.len();
let node_info: Vec<NodeInfo> = node_indices
.iter()
.map(|&ri| {
let rec = &*records[ri];
let gtp = GtAndPhase::from(rec);
NodeInfo {
pos: rec.pos(),
slots: [gtp.alleles[0].unwrap(), gtp.alleles[1].unwrap()],
alleles: rec.alleles().iter().map(|a| a.to_vec()).collect(),
}
})
.collect();
let mut same = vec![vec![0i32; n]; n];
let mut diff = vec![vec![0i32; n]; n];
let fd = match FetchDefinition::try_from(region) {
Ok(fd) => fd,
Err(e) => {
warn!("local_phasing: cannot build fetch definition: {e}");
return Err(BamPhaseResolverError::Other(e));
}
};
if let Err(e) = reader.fetch(fd) {
warn!("local_phasing: fetch failed: {e}");
return Err(BamPhaseResolverError::BamReadError(e));
}
let mut bam_rec = bam::Record::new();
while let Some(result) = reader.read(&mut bam_rec) {
if result.is_err() {
warn!("local_phasing: BAM read error; stopping scan for this cluster");
break;
}
if bam_rec.is_unmapped()
|| bam_rec.is_secondary()
|| bam_rec.is_supplementary()
|| bam_rec.is_duplicate()
|| bam_rec.is_quality_check_failed()
|| bam_rec.mapq() < settings.min_mapq
{
continue;
}
let support: Vec<Option<(bool, bool)>> = node_info
.iter()
.map(|info| info.slot_label(&bam_rec, settings.min_base_qual))
.collect();
for i in 0..n {
let Some((si, ri)) = support[i] else { continue };
for j in (i + 1)..n {
let Some((sj, rj)) = support[j] else { continue };
if si == sj {
if ri && rj {
continue;
}
same[i][j] += 1;
same[j][i] += 1;
} else {
diff[i][j] += 1;
diff[j][i] += 1;
}
}
}
}
let (assigned_phaseset, has_anchors) = if anchors.is_empty() {
#[allow(clippy::cast_possible_truncation)]
let ps = records.first().map_or(0, |r| (r.pos() + 1) as i32);
(ps, false)
} else {
(anchors[0].1.phase.unwrap(), true)
};
let unresolved_count = unphased_indices.len();
let mut assignment: Vec<Option<Orientation>> = vec![None; unresolved_count];
let decide = |v_direct: i32, v_flipped: i32| -> Decision {
let total = (v_direct + v_flipped).max(0) as u32;
if total == 0 {
return Decision::NoEvidence;
}
let (winner, winner_count) = if v_direct >= v_flipped {
(Orientation::Direct, v_direct as u32)
} else {
(Orientation::Flipped, v_flipped as u32)
};
if winner_count < settings.min_reads {
return Decision::InsufficientReads;
}
let fraction = f64::from(winner_count) / f64::from(total);
if fraction < settings.confidence {
return Decision::LowConfidence;
}
Decision::Resolved(winner)
};
for k in 0..unresolved_count {
let node_k = anchor_count + k;
if has_anchors {
let mut v_direct = 0i32;
let mut v_flipped = 0i32;
for a_node in 0..anchor_count {
v_direct += same[a_node][node_k];
v_flipped += diff[a_node][node_k];
}
assignment[k] = record_outcome(decide(v_direct, v_flipped));
} else {
if k == 0 {
let max_pair_cov: u32 = (0..n)
.filter(|&j| j != node_k)
.map(|j| (same[node_k][j] + diff[node_k][j]).max(0) as u32)
.max()
.unwrap_or(0);
let decision = if max_pair_cov == 0 {
Decision::NoEvidence
} else if max_pair_cov < settings.min_reads {
Decision::InsufficientReads
} else {
Decision::Resolved(Orientation::Direct)
};
assignment[k] = record_outcome(decision);
continue;
}
let mut v_direct = 0i32;
let mut v_flipped = 0i32;
for (j, item) in assignment.iter().enumerate().take(k) {
let node_j = anchor_count + j;
match item {
Some(Orientation::Direct) => {
v_direct += same[node_j][node_k];
v_flipped += diff[node_j][node_k];
}
Some(Orientation::Flipped) => {
v_flipped += same[node_j][node_k];
v_direct += diff[node_j][node_k];
}
None => {}
}
}
assignment[k] = record_outcome(decide(v_direct, v_flipped));
}
}
for (k, &rec_idx) in unphased_indices.iter().enumerate() {
let Some(orientation) = assignment[k] else {
continue;
};
let [g0, g1] = node_info[anchor_count + k].slots;
let (new_a0, new_a1): (u32, u32) = match orientation {
Orientation::Direct => (g0, g1),
Orientation::Flipped => (g1, g0),
};
let mut new_rec = (*records[rec_idx]).clone();
if let Err(e) = new_rec.push_genotypes(&[
GenotypeAllele::Unphased(new_a0 as i32),
GenotypeAllele::Phased(new_a1 as i32),
]) {
warn!(
"local_phasing: failed to update GT for record at {}: {e}",
new_rec.pos()
);
continue;
}
if let Err(e) = new_rec.push_format_integer(b"PS", &[assigned_phaseset]) {
warn!(
"local_phasing: failed to set PS for record at {}: {e}",
new_rec.pos()
);
continue;
}
records[rec_idx] = Arc::new(new_rec);
}
Ok(())
}
fn read_allele_at_pos(
read: &bam::Record,
vcf_pos: i64,
ref_allele: &[u8],
alt_allele: &[u8],
min_base_qual: u8,
) -> Option<bool> {
use rust_htslib::bam::record::Cigar;
let (vcf_pos, ref_allele, alt_allele) =
if !ref_allele.is_empty() && !alt_allele.is_empty() && ref_allele[0] == alt_allele[0] {
(vcf_pos + 1, &ref_allele[1..], &alt_allele[1..])
} else {
(vcf_pos, ref_allele, alt_allele)
};
if vcf_pos < read.pos() {
return None;
}
let cigar = read.cigar();
let seq = read.seq();
let qual = read.qual();
let mut ref_pos: i64 = read.pos();
let mut query_pos: usize = 0;
for op in &cigar {
match op {
Cigar::Match(raw_len) | Cigar::Equal(raw_len) | Cigar::Diff(raw_len) => {
let op_len = i64::from(*raw_len);
if ref_pos + op_len > vcf_pos {
let offset = (vcf_pos - ref_pos) as usize;
return match (ref_allele.len(), alt_allele.len()) {
(_, 0) | (0, _) => {
if qual
.get(query_pos + offset)
.is_some_and(|&q| q < min_base_qual)
{
return None;
}
Some(false)
}
(rlen, alen) if rlen == alen => {
let avail = (*raw_len as usize).saturating_sub(offset);
if rlen > avail {
return None; }
let qstart = query_pos + offset;
if (0..rlen)
.any(|i| qual.get(qstart + i).is_some_and(|&q| q < min_base_qual))
{
return None; }
let mut matches_alt = true;
let mut matches_ref = true;
for i in 0..rlen {
let base_uc = seq[qstart + i].to_ascii_uppercase();
if base_uc != alt_allele[i].to_ascii_uppercase() {
matches_alt = false;
}
if base_uc != ref_allele[i].to_ascii_uppercase() {
matches_ref = false;
}
}
match (matches_alt, matches_ref) {
(true, _) => Some(true),
(false, true) => Some(false),
_ => None,
}
}
_ => None, };
}
ref_pos += op_len;
query_pos += *raw_len as usize;
}
Cigar::Ins(ins_len) => {
if ref_allele.is_empty() && !alt_allele.is_empty() && ref_pos == vcf_pos {
let il = *ins_len as usize;
if il == alt_allele.len()
&& (0..il).all(|i| seq[query_pos + i].eq_ignore_ascii_case(&alt_allele[i]))
{
if (0..il)
.any(|i| qual.get(query_pos + i).is_some_and(|&q| q < min_base_qual))
{
return None; }
return Some(true);
}
return None;
}
query_pos += *ins_len as usize;
}
Cigar::Del(del_len) => {
let dl = i64::from(*del_len);
if !ref_allele.is_empty() && alt_allele.is_empty() {
if ref_pos == vcf_pos && dl == ref_allele.len() as i64 {
return Some(true);
} else if ref_pos <= vcf_pos && vcf_pos < ref_pos + dl {
return None;
}
} else if ref_pos <= vcf_pos && vcf_pos < ref_pos + dl {
return None;
}
ref_pos += dl;
}
Cigar::SoftClip(sc_len) => {
query_pos += *sc_len as usize;
}
Cigar::RefSkip(rs_len) => {
ref_pos += i64::from(*rs_len);
}
Cigar::HardClip(_) | Cigar::Pad(_) => {}
}
if ref_pos > vcf_pos {
break;
}
}
None
}
fn read_allele_index_at_pos(
read: &bam::Record,
vcf_pos: i64,
alleles: &[&[u8]],
min_base_qual: u8,
) -> Option<u32> {
if alleles.len() < 2 {
return None;
}
let mut ref_seen = false;
for (i, alt) in alleles.iter().enumerate().skip(1) {
match read_allele_at_pos(read, vcf_pos, alleles[0], alt, min_base_qual) {
#[allow(clippy::cast_possible_truncation)]
Some(true) => return Some(i as u32), Some(false) => ref_seen = true, None => {}
}
}
ref_seen.then_some(0)
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use rust_htslib::bcf::record::GenotypeAllele;
use rust_htslib::{
bam::{self, record::CigarString},
bcf::{self, Header, Writer},
};
use super::super::phasing::Haplotype;
use super::*;
use crate::common::{contig::ContigName, coords::GenomePosition};
fn hap_of(gtp: &GtAndPhase) -> Option<Haplotype> {
match gtp.alleles {
[Some(a), _] if a > 0 => Some(Haplotype::H0),
[Some(0), Some(b)] if b > 0 => Some(Haplotype::H1),
_ => None,
}
}
fn make_vcf_header() -> Header {
let mut h = Header::new();
h.push_record(b"##contig=<ID=chr1,length=1000000>");
h.push_record(b"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
h.push_record(b"##FORMAT=<ID=PS,Number=1,Type=Integer,Description=\"Phase set\">");
h.push_sample(b"S1");
h
}
fn make_unphased_het(writer: &Writer, pos: i64, ref_a: &[u8], alt_a: &[u8]) -> bcf::Record {
let mut rec = writer.empty_record();
let rid = rec.header().name2rid(b"chr1").unwrap();
rec.set_rid(Some(rid));
rec.set_pos(pos);
rec.set_alleles(&[ref_a, alt_a]).unwrap();
rec.push_genotypes(&[GenotypeAllele::Unphased(0), GenotypeAllele::Unphased(1)])
.unwrap();
rec
}
fn make_phased_het(
writer: &Writer,
pos: i64,
ref_a: &[u8],
alt_a: &[u8],
hap: Haplotype,
ps: i32,
) -> bcf::Record {
let (a0, a1) = if hap == Haplotype::H1 {
(GenotypeAllele::Unphased(0), GenotypeAllele::Phased(1))
} else {
(GenotypeAllele::Unphased(1), GenotypeAllele::Phased(0))
};
let mut rec = writer.empty_record();
let rid = rec.header().name2rid(b"chr1").unwrap();
rec.set_rid(Some(rid));
rec.set_pos(pos);
rec.set_alleles(&[ref_a, alt_a]).unwrap();
rec.push_genotypes(&[a0, a1]).unwrap();
rec.push_format_integer(b"PS", &[ps]).unwrap();
rec
}
fn make_phased_het_multi(
writer: &Writer,
pos: i64,
alleles: &[&[u8]],
gt: (u32, u32),
ps: i32,
) -> bcf::Record {
let mut rec = writer.empty_record();
let rid = rec.header().name2rid(b"chr1").unwrap();
rec.set_rid(Some(rid));
rec.set_pos(pos);
rec.set_alleles(alleles).unwrap();
rec.push_genotypes(&[
GenotypeAllele::Unphased(gt.0 as i32),
GenotypeAllele::Phased(gt.1 as i32),
])
.unwrap();
rec.push_format_integer(b"PS", &[ps]).unwrap();
rec
}
fn make_bam_record(pos: i64, seq: &[u8], cigar: CigarString) -> bam::Record {
make_bam_record_q(pos, seq, cigar, &vec![60u8; seq.len()])
}
fn make_bam_record_q(pos: i64, seq: &[u8], cigar: CigarString, quals: &[u8]) -> bam::Record {
let mut rec = bam::Record::new();
rec.set_pos(pos);
rec.set_tid(0);
rec.unset_unmapped();
rec.set(b"read1", Some(&cigar), seq, quals);
rec.set_mapq(60);
rec.cache_cigar();
rec
}
fn make_bam_header() -> bam::Header {
let mut header = bam::Header::new();
let mut sq = bam::header::HeaderRecord::new(b"SQ");
sq.push_tag(b"SN", "chr1");
sq.push_tag(b"LN", 1_000_000i32);
header.push_record(&sq);
header
}
fn write_bam_and_index(mut records: Vec<bam::Record>, path: &std::path::Path) {
let header = make_bam_header();
let mut writer = bam::Writer::from_path(path, &header, bam::Format::Bam).unwrap();
for rec in &mut records {
rec.set_tid(0);
writer.write(rec).unwrap();
}
drop(writer);
bam::index::build(path, None, bam::index::Type::Bai, 1).unwrap();
}
fn make_indexed_reader(path: &std::path::Path) -> bam::IndexedReader {
bam::IndexedReader::from_path(path).unwrap()
}
fn make_region(pos: usize, len: usize) -> GenomeRegion {
GenomeRegion::new_bounded(GenomePosition::new_0(ContigName::new(b"chr1"), pos), len)
}
fn lenient_settings() -> PhasingSettings {
PhasingSettings {
min_reads: 1,
confidence: 0.6,
..PhasingSettings::default()
}
}
#[test]
fn test_allele_support_snv_alt() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(100, b"TTTTT", CigarString(vec![Cigar::Match(5)]));
assert_eq!(read_allele_at_pos(&rec, 102, b"A", b"T", 0), Some(true));
}
#[test]
fn test_allele_support_snv_ref() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(100, b"AAAAA", CigarString(vec![Cigar::Match(5)]));
assert_eq!(read_allele_at_pos(&rec, 102, b"A", b"T", 0), Some(false));
}
#[test]
fn test_allele_support_snv_no_cover() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(100, b"AAAAA", CigarString(vec![Cigar::Match(5)]));
assert_eq!(read_allele_at_pos(&rec, 200, b"A", b"T", 0), None);
}
#[test]
fn test_allele_support_mnv_alt() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(100, b"AAGCAA", CigarString(vec![Cigar::Match(6)]));
assert_eq!(read_allele_at_pos(&rec, 102, b"AT", b"GC", 0), Some(true));
}
#[test]
fn test_allele_support_mnv_ref() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(100, b"AAATAA", CigarString(vec![Cigar::Match(6)]));
assert_eq!(read_allele_at_pos(&rec, 102, b"AT", b"GC", 0), Some(false));
}
#[test]
fn test_allele_support_deletion_alt() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(
100,
b"AA",
CigarString(vec![Cigar::Match(1), Cigar::Del(1), Cigar::Match(1)]),
);
assert_eq!(read_allele_at_pos(&rec, 100, b"AT", b"A", 0), Some(true));
}
#[test]
fn test_allele_support_insertion_alt() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(
100,
b"ATB",
CigarString(vec![Cigar::Match(1), Cigar::Ins(1), Cigar::Match(1)]),
);
assert_eq!(read_allele_at_pos(&rec, 100, b"A", b"AT", 0), Some(true));
}
#[test]
fn test_allele_support_deletion_ref() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(100, b"AA", CigarString(vec![Cigar::Match(2)]));
assert_eq!(read_allele_at_pos(&rec, 100, b"AT", b"A", 0), Some(false));
}
#[test]
fn test_allele_support_insertion_ref() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(100, b"AA", CigarString(vec![Cigar::Match(2)]));
assert_eq!(read_allele_at_pos(&rec, 100, b"A", b"AT", 0), Some(false));
}
#[test]
fn test_allele_support_deletion_read_starts_at_trimmed_pos() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(101, b"A", CigarString(vec![Cigar::Del(1), Cigar::Match(1)]));
assert_eq!(read_allele_at_pos(&rec, 100, b"AT", b"A", 0), Some(true));
}
#[test]
fn test_allele_support_ref_read_starts_at_trimmed_pos() {
use rust_htslib::bam::record::Cigar;
let rec = make_bam_record(101, b"TA", CigarString(vec![Cigar::Match(2)]));
assert_eq!(read_allele_at_pos(&rec, 100, b"AT", b"A", 0), Some(false));
}
#[test]
fn test_resolve_no_unphased_het() {
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let r1 = make_phased_het(&w, 100, b"A", b"T", Haplotype::H1, 100);
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(r1)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(vec![], &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(
&mut input,
®ion,
&mut reader,
&PhasingSettings::default(),
)
.unwrap();
assert_eq!(input.len(), 1);
let gtp = GtAndPhase::from(&*input[0]);
assert!(gtp.phase.is_some(), "anchor should remain phased");
assert_eq!(hap_of(>p), Some(Haplotype::H1));
}
#[test]
fn test_resolve_two_unphased_hets_no_coverage() {
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let r1 = make_unphased_het(&w, 100, b"A", b"T");
let r2 = make_unphased_het(&w, 110, b"G", b"C");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(r1), Arc::new(r2)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(vec![], &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
let before = counter!("phasing.records.unresolved.no_evidence").get();
resolve_phasing(
&mut input,
®ion,
&mut reader,
&PhasingSettings::default(),
)
.unwrap();
assert!(GtAndPhase::from(&*input[0]).is_unphased_het());
assert!(GtAndPhase::from(&*input[1]).is_unphased_het());
assert_eq!(
counter!("phasing.records.unresolved.no_evidence").get() - before,
2
);
}
#[test]
fn test_resolve_two_unphased_hets_same_hap() {
use rust_htslib::bam::record::Cigar;
let mut seq = vec![b'T'];
seq.extend(b"AAAAAAAAA"); seq.push(b'C'); seq.extend(b"AAAAAAAAA"); let cigar = CigarString(vec![Cigar::Match(20)]);
let read1 = make_bam_record(100, &seq, cigar.clone());
let read2 = make_bam_record(100, &seq, cigar);
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let r1 = make_unphased_het(&w, 100, b"A", b"T");
let r2 = make_unphased_het(&w, 110, b"G", b"C");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(r1), Arc::new(r2)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(vec![read1, read2], &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
let before = counter!("phasing.records.resolved").get();
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
let gtp0 = GtAndPhase::from(&*input[0]);
let gtp1 = GtAndPhase::from(&*input[1]);
assert!(gtp0.phase.is_some(), "r1 should be resolved");
assert!(gtp1.phase.is_some(), "r2 should be resolved");
assert_eq!(
hap_of(>p0),
hap_of(>p1),
"both records should be on the same haplotype"
);
assert_eq!(counter!("phasing.records.resolved").get() - before, 2);
}
#[test]
fn test_resolve_two_unphased_hets_opposite_haps() {
use rust_htslib::bam::record::Cigar;
let mut s0 = vec![b'T'];
s0.extend(b"AAAAAAAAA");
s0.push(b'G'); s0.extend(b"AAAAAAAAA");
let mut s1 = vec![b'A']; s1.extend(b"AAAAAAAAA");
s1.push(b'C'); s1.extend(b"AAAAAAAAA");
let cigar = CigarString(vec![Cigar::Match(20)]);
let reads = vec![
make_bam_record(100, &s0, cigar.clone()),
make_bam_record(100, &s0, cigar.clone()),
make_bam_record(100, &s1, cigar.clone()),
make_bam_record(100, &s1, cigar),
];
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let r1 = make_unphased_het(&w, 100, b"A", b"T");
let r2 = make_unphased_het(&w, 110, b"G", b"C");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(r1), Arc::new(r2)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
let gtp0 = GtAndPhase::from(&*input[0]);
let gtp1 = GtAndPhase::from(&*input[1]);
assert!(
gtp0.phase.is_some() && gtp1.phase.is_some(),
"both should be resolved"
);
assert_ne!(
hap_of(>p0),
hap_of(>p1),
"records should be on opposite haplotypes"
);
}
#[test]
fn test_resolve_below_min_reads_threshold() {
use rust_htslib::bam::record::Cigar;
let mut seq = vec![b'T'];
seq.extend(b"AAAAAAAAA");
seq.push(b'C');
seq.extend(b"AAAAAAAAA");
let cigar = CigarString(vec![Cigar::Match(20)]);
let read1 = make_bam_record(100, &seq, cigar);
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let r1 = make_unphased_het(&w, 100, b"A", b"T");
let r2 = make_unphased_het(&w, 110, b"G", b"C");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(r1), Arc::new(r2)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(vec![read1], &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
let settings = PhasingSettings {
min_reads: 3,
confidence: 0.0,
..PhasingSettings::default()
};
let before = counter!("phasing.records.unresolved.insufficient_reads").get();
resolve_phasing(&mut input, ®ion, &mut reader, &settings).unwrap();
assert_eq!(
counter!("phasing.records.unresolved.insufficient_reads").get() - before,
1
);
assert!(
GtAndPhase::from(&*input[1]).is_unphased_het(),
"second record should remain unphased with only 1 read < min_reads=3"
);
assert!(
GtAndPhase::from(&*input[0]).is_unphased_het(),
"seed (first) record should also remain unphased: 1 read < min_reads=3"
);
}
#[test]
fn test_resolve_below_confidence_threshold() {
use rust_htslib::bam::record::Cigar;
let mut s_same = vec![b'T'];
s_same.extend(b"AAAAAAAAA");
s_same.push(b'C');
s_same.extend(b"AAAAAAAAA");
let mut s_diff = vec![b'T'];
s_diff.extend(b"AAAAAAAAA");
s_diff.push(b'G'); s_diff.extend(b"AAAAAAAAA");
let cigar = CigarString(vec![Cigar::Match(20)]);
let reads = vec![
make_bam_record(100, &s_same, cigar.clone()),
make_bam_record(100, &s_same, cigar.clone()),
make_bam_record(100, &s_diff, cigar.clone()),
make_bam_record(100, &s_diff, cigar),
];
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let r1 = make_unphased_het(&w, 100, b"A", b"T");
let r2 = make_unphased_het(&w, 110, b"G", b"C");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(r1), Arc::new(r2)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
let settings = PhasingSettings {
min_reads: 1,
confidence: 0.8,
..PhasingSettings::default()
};
let before = counter!("phasing.records.unresolved.low_confidence").get();
resolve_phasing(&mut input, ®ion, &mut reader, &settings).unwrap();
assert_eq!(
counter!("phasing.records.unresolved.low_confidence").get() - before,
1
);
assert!(
GtAndPhase::from(&*input[1]).is_unphased_het(),
"second record should remain unphased below confidence threshold (50% < 0.8)"
);
}
#[test]
fn test_resolve_unphased_with_phased_anchor() {
use rust_htslib::bam::record::Cigar;
let mut seq = vec![b'T']; seq.extend(b"AAAAAAAAA");
seq.push(b'C'); seq.extend(b"AAAAAAAAA");
let cigar = CigarString(vec![Cigar::Match(20)]);
let read1 = make_bam_record(100, &seq, cigar.clone());
let read2 = make_bam_record(100, &seq, cigar);
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let r1 = make_unphased_het(&w, 100, b"A", b"T");
let r2 = make_phased_het(&w, 110, b"G", b"C", Haplotype::H1, 100);
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(r1), Arc::new(r2)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(vec![read1, read2], &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
let gtp0 = GtAndPhase::from(&*input[0]);
assert!(gtp0.phase.is_some(), "UnphasedHet should be resolved");
assert_eq!(
hap_of(>p0),
Some(Haplotype::H1),
"should resolve to same hap as anchor"
);
let gtp1 = GtAndPhase::from(&*input[1]);
assert!(gtp1.phase.is_some(), "anchor should remain phased");
assert_eq!(hap_of(>p1), Some(Haplotype::H1));
}
#[test]
fn test_resolve_refref_only_does_not_phase() {
use rust_htslib::bam::record::Cigar;
let mut seq = vec![b'A']; seq.extend(b"AAAAAAAAA"); seq.push(b'G'); seq.extend(b"AAAAAAAAA"); let cigar = CigarString(vec![Cigar::Match(20)]);
let reads: Vec<bam::Record> = (0..5)
.map(|_| make_bam_record(100, &seq, cigar.clone()))
.collect();
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let anchor = make_phased_het(&w, 100, b"A", b"T", Haplotype::H1, 100);
let unphased = make_unphased_het(&w, 110, b"G", b"C");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(anchor), Arc::new(unphased)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
let before = counter!("phasing.records.unresolved.no_evidence").get();
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
assert!(
GtAndPhase::from(&*input[1]).is_unphased_het(),
"REF/REF-only evidence must not phase the record"
);
assert_eq!(
counter!("phasing.records.unresolved.no_evidence").get() - before,
1
);
}
#[test]
fn test_resolve_refref_reads_do_not_inflate() {
use rust_htslib::bam::record::Cigar;
let cigar = CigarString(vec![Cigar::Match(20)]);
let mut alt = vec![b'T'];
alt.extend(b"AAAAAAAAA");
alt.push(b'C');
alt.extend(b"AAAAAAAAA");
let mut refr = vec![b'A'];
refr.extend(b"AAAAAAAAA");
refr.push(b'G');
refr.extend(b"AAAAAAAAA");
let mut reads: Vec<bam::Record> = (0..3)
.map(|_| make_bam_record(100, &alt, cigar.clone()))
.collect();
reads.extend((0..5).map(|_| make_bam_record(100, &refr, cigar.clone())));
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let anchor = make_phased_het(&w, 100, b"A", b"T", Haplotype::H1, 100);
let unphased = make_unphased_het(&w, 110, b"G", b"C");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(anchor), Arc::new(unphased)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
let settings = PhasingSettings {
min_reads: 3,
confidence: 0.8,
..PhasingSettings::default()
};
resolve_phasing(&mut input, ®ion, &mut reader, &settings).unwrap();
let gtp = GtAndPhase::from(&*input[1]);
assert!(
gtp.phase.is_some(),
"genuine ALT/ALT linkage should still resolve"
);
assert_eq!(hap_of(>p), Some(Haplotype::H1));
}
#[test]
fn test_resolve_low_mapq_reads_ignored() {
use rust_htslib::bam::record::Cigar;
let mut seq = vec![b'T']; seq.extend(b"AAAAAAAAA");
seq.push(b'C'); seq.extend(b"AAAAAAAAA");
let cigar = CigarString(vec![Cigar::Match(20)]);
let reads: Vec<bam::Record> = (0..5)
.map(|_| {
let mut r = make_bam_record(100, &seq, cigar.clone());
r.set_mapq(5);
r
})
.collect();
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let anchor = make_phased_het(&w, 100, b"A", b"T", Haplotype::H1, 100);
let unphased = make_unphased_het(&w, 110, b"G", b"C");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(anchor), Arc::new(unphased)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
assert!(
GtAndPhase::from(&*input[1]).is_unphased_het(),
"reads below the MAPQ floor must not phase the record"
);
}
#[test]
fn test_resolve_low_base_quality_at_variant_ignored() {
use rust_htslib::bam::record::Cigar;
let mut seq = vec![b'T']; seq.extend(b"AAAAAAAAA");
seq.push(b'C'); seq.extend(b"AAAAAAAAA");
let cigar = CigarString(vec![Cigar::Match(20)]);
let mut quals = vec![60u8; seq.len()];
quals[10] = 5; let reads: Vec<bam::Record> = (0..5)
.map(|_| make_bam_record_q(100, &seq, cigar.clone(), &quals))
.collect();
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let anchor = make_phased_het(&w, 100, b"A", b"T", Haplotype::H1, 100);
let unphased = make_unphased_het(&w, 110, b"G", b"C");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(anchor), Arc::new(unphased)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
assert!(
GtAndPhase::from(&*input[1]).is_unphased_het(),
"a low-quality base at the variant must not phase the record"
);
}
#[test]
fn test_resolve_multiple_phaseset_early_exit() {
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let r1 = make_phased_het(&w, 100, b"A", b"T", Haplotype::H1, 100);
let r2 = make_phased_het(&w, 110, b"G", b"C", Haplotype::H0, 200); let r3 = make_unphased_het(&w, 120, b"C", b"G");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(r1), Arc::new(r2), Arc::new(r3)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(vec![], &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 70);
let err = resolve_phasing(
&mut input,
®ion,
&mut reader,
&PhasingSettings::default(),
)
.unwrap_err();
assert!(
matches!(err, BamPhaseResolverError::MultiplePhaseSets(_)),
"anchors spanning two distinct phasesets should be rejected"
);
assert!(
GtAndPhase::from(&*input[2]).is_unphased_het(),
"multiple PSs should cause early exit leaving UnphasedHet unchanged"
);
}
#[test]
fn test_resolve_2_1_anchor_correct_direction() {
use rust_htslib::bam::record::Cigar;
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let anchor = make_phased_het_multi(&w, 100, &[b"A", b"T", b"C"], (2, 1), 100);
let unresolved = make_unphased_het(&w, 110, b"G", b"A");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(anchor), Arc::new(unresolved)];
let mut seq = vec![b'T'];
seq.extend(b"AAAAAAAAA"); seq.push(b'A'); let cigar = CigarString(vec![Cigar::Match(seq.len() as u32)]);
let reads: Vec<bam::Record> = (0..3)
.map(|_| make_bam_record(100, &seq, cigar.clone()))
.collect();
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
let gtp = GtAndPhase::from(&*input[1]);
assert!(gtp.phase.is_some(), "unresolved should be phased");
assert_eq!(
hap_of(>p),
Some(Haplotype::H1),
"should be H1: ALT1 of 2|1 anchor is on H1"
);
}
#[test]
fn test_resolve_1_2_anchor_h0_evidence_correct() {
use rust_htslib::bam::record::Cigar;
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let anchor = make_phased_het_multi(&w, 100, &[b"A", b"T", b"C"], (1, 2), 100);
let unresolved = make_unphased_het(&w, 110, b"G", b"A");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(anchor), Arc::new(unresolved)];
let mut seq = vec![b'T'];
seq.extend(b"AAAAAAAAA"); seq.push(b'A'); let cigar = CigarString(vec![Cigar::Match(seq.len() as u32)]);
let reads: Vec<bam::Record> = (0..3)
.map(|_| make_bam_record(100, &seq, cigar.clone()))
.collect();
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
let gtp = GtAndPhase::from(&*input[1]);
assert!(gtp.phase.is_some(), "unresolved should be phased");
assert_eq!(
hap_of(>p),
Some(Haplotype::H0),
"should be H0: ALT1 of 1|2 anchor is on H0"
);
}
#[test]
fn test_resolve_missing_allele_not_modified() {
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let mut rec_missing = w.empty_record();
let rid = rec_missing.header().name2rid(b"chr1").unwrap();
rec_missing.set_rid(Some(rid));
rec_missing.set_pos(100);
rec_missing.set_alleles(&[b"A", b"T"]).unwrap();
rec_missing
.push_genotypes(&[GenotypeAllele::Unphased(1), GenotypeAllele::UnphasedMissing])
.unwrap();
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(rec_missing)];
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(vec![], &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 30);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
let gtp = GtAndPhase::from(&*input[0]);
assert!(
!gtp.is_phased(),
"1/. record must not be phased by resolve_phasing"
);
assert!(
gtp.has_missing(),
"alleles must remain unchanged (still has missing)"
);
}
#[test]
fn test_resolve_1_2_unphased_ref_reads_excluded() {
use rust_htslib::bam::record::Cigar;
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let anchor = make_phased_het(&w, 100, b"A", b"T", Haplotype::H1, 100);
let mut rec_12 = w.empty_record();
let rid = rec_12.header().name2rid(b"chr1").unwrap();
rec_12.set_rid(Some(rid));
rec_12.set_pos(110);
rec_12.set_alleles(&[b"G", b"C", b"X"]).unwrap();
rec_12
.push_genotypes(&[GenotypeAllele::Unphased(1), GenotypeAllele::Unphased(2)])
.unwrap();
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(anchor), Arc::new(rec_12)];
let mut sig_seq = vec![b'T'];
sig_seq.extend(b"AAAAAAAAA"); sig_seq.push(b'C'); let cigar = CigarString(vec![Cigar::Match(sig_seq.len() as u32)]);
let mut reads: Vec<bam::Record> = (0..3)
.map(|_| make_bam_record(100, &sig_seq, cigar.clone()))
.collect();
let mut noise_seq = vec![b'T'];
noise_seq.extend(b"AAAAAAAAA"); noise_seq.push(b'G'); let noise_cigar = CigarString(vec![Cigar::Match(noise_seq.len() as u32)]);
reads.extend((0..3).map(|_| make_bam_record(100, &noise_seq, noise_cigar.clone())));
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
let gtp = GtAndPhase::from(&*input[1]);
assert!(gtp.phase.is_some(), "1/2 unresolved should be phased");
assert_eq!(
gtp.alleles[1],
Some(1),
"ALT1(C) of 1/2 should be at phased GT position (alleles[1]=1), meaning it is on H1"
);
}
fn make_unphased_het_multi(
writer: &Writer,
pos: i64,
alleles: &[&[u8]],
gt: (u32, u32),
) -> bcf::Record {
let mut rec = writer.empty_record();
let rid = rec.header().name2rid(b"chr1").unwrap();
rec.set_rid(Some(rid));
rec.set_pos(pos);
rec.set_alleles(alleles).unwrap();
rec.push_genotypes(&[
GenotypeAllele::Unphased(gt.0 as i32),
GenotypeAllele::Unphased(gt.1 as i32),
])
.unwrap();
rec
}
#[test]
fn test_index_snv_picks_each_allele() {
use rust_htslib::bam::record::Cigar;
let alleles: &[&[u8]] = &[b"A", b"T", b"C"];
let cigar = CigarString(vec![Cigar::Match(5)]);
let alt1 = make_bam_record(100, b"AATAA", cigar.clone());
let alt2 = make_bam_record(100, b"AACAA", cigar.clone());
let refr = make_bam_record(100, b"AAAAA", cigar.clone());
let other = make_bam_record(100, b"AAGAA", cigar);
assert_eq!(read_allele_index_at_pos(&alt1, 102, alleles, 0), Some(1));
assert_eq!(read_allele_index_at_pos(&alt2, 102, alleles, 0), Some(2));
assert_eq!(read_allele_index_at_pos(&refr, 102, alleles, 0), Some(0));
assert_eq!(read_allele_index_at_pos(&other, 102, alleles, 0), None);
}
#[test]
fn test_index_alt2_not_mistaken_for_alt1() {
use rust_htslib::bam::record::Cigar;
let alleles: &[&[u8]] = &[b"A", b"T", b"C"];
let rec = make_bam_record(100, b"AACAA", CigarString(vec![Cigar::Match(5)]));
assert_eq!(read_allele_index_at_pos(&rec, 102, alleles, 0), Some(2));
}
#[test]
fn test_index_insertion_discrimination() {
use rust_htslib::bam::record::Cigar;
let alleles: &[&[u8]] = &[b"A", b"AT", b"AG"];
let cigar = CigarString(vec![Cigar::Match(1), Cigar::Ins(1), Cigar::Match(1)]);
let ins_t = make_bam_record(100, b"ATB", cigar.clone());
let ins_g = make_bam_record(100, b"AGB", cigar);
assert_eq!(read_allele_index_at_pos(&ins_t, 100, alleles, 0), Some(1));
assert_eq!(read_allele_index_at_pos(&ins_g, 100, alleles, 0), Some(2));
}
#[test]
fn test_index_deletion_length_discrimination() {
use rust_htslib::bam::record::Cigar;
let alleles: &[&[u8]] = &[b"ATG", b"A"];
let del2 = make_bam_record(
100,
b"AC",
CigarString(vec![Cigar::Match(1), Cigar::Del(2), Cigar::Match(1)]),
);
let del1 = make_bam_record(
100,
b"AG",
CigarString(vec![Cigar::Match(1), Cigar::Del(1), Cigar::Match(1)]),
);
assert_eq!(read_allele_index_at_pos(&del2, 100, alleles, 0), Some(1));
assert_eq!(
read_allele_index_at_pos(&del1, 100, alleles, 0),
None,
"a different-length deletion is not this ALT"
);
}
#[test]
fn test_resolve_1_2_anchor_alt2_evidence() {
use rust_htslib::bam::record::Cigar;
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let anchor = make_phased_het_multi(&w, 100, &[b"A", b"T", b"C"], (1, 2), 100);
let unresolved = make_unphased_het(&w, 110, b"G", b"A");
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(anchor), Arc::new(unresolved)];
let mut seq = vec![b'C']; seq.extend(b"AAAAAAAAA");
seq.push(b'A'); let cigar = CigarString(vec![Cigar::Match(seq.len() as u32)]);
let reads: Vec<bam::Record> = (0..3)
.map(|_| make_bam_record(100, &seq, cigar.clone()))
.collect();
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
let gtp = GtAndPhase::from(&*input[1]);
assert!(gtp.phase.is_some(), "unresolved should be phased");
assert_eq!(
hap_of(>p),
Some(Haplotype::H1),
"ALT2 of 1|2 anchor is on H1 → unresolved ALT should be H1"
);
}
#[test]
fn test_resolve_native_1_2_unresolved_by_alt2() {
use rust_htslib::bam::record::Cigar;
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let anchor = make_phased_het(&w, 100, b"A", b"T", Haplotype::H1, 100);
let unresolved = make_unphased_het_multi(&w, 110, &[b"G", b"C", b"A"], (1, 2));
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(anchor), Arc::new(unresolved)];
let mut seq = vec![b'T']; seq.extend(b"AAAAAAAAA");
seq.push(b'A'); let cigar = CigarString(vec![Cigar::Match(seq.len() as u32)]);
let reads: Vec<bam::Record> = (0..3)
.map(|_| make_bam_record(100, &seq, cigar.clone()))
.collect();
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
let gtp = GtAndPhase::from(&*input[1]);
assert!(gtp.phase.is_some(), "1/2 unresolved should be phased");
assert_eq!(
gtp.alleles[1],
Some(2),
"ALT2(A) co-occurs with anchor H1 → ALT2 at phased position (alleles[1]=2)"
);
}
#[test]
fn test_resolve_no_anchor_pair_of_1_2() {
use rust_htslib::bam::record::Cigar;
let h = make_vcf_header();
let w = Writer::from_stdout(&h, true, bcf::Format::Vcf).unwrap();
let r1 = make_unphased_het_multi(&w, 100, &[b"A", b"T", b"C"], (1, 2));
let r2 = make_unphased_het_multi(&w, 110, &[b"G", b"C", b"A"], (1, 2));
let mut input: Vec<Arc<bcf::Record>> = vec![Arc::new(r1), Arc::new(r2)];
let mut seq = vec![b'T']; seq.extend(b"AAAAAAAAA");
seq.push(b'C'); let cigar = CigarString(vec![Cigar::Match(seq.len() as u32)]);
let reads: Vec<bam::Record> = (0..3)
.map(|_| make_bam_record(100, &seq, cigar.clone()))
.collect();
let dir = tempfile::tempdir().unwrap();
let bam_path = dir.path().join("test.bam");
write_bam_and_index(reads, &bam_path);
let mut reader = make_indexed_reader(&bam_path);
let region = make_region(90, 50);
resolve_phasing(&mut input, ®ion, &mut reader, &lenient_settings()).unwrap();
assert!(
GtAndPhase::from(&*input[0]).phase.is_some(),
"r1 should be resolved"
);
assert!(
GtAndPhase::from(&*input[1]).phase.is_some(),
"r2 should be resolved"
);
}
}