twitcher 0.1.8

Find template switch mutations in genomic data
use rust_htslib::bcf::Record;

pub use crate::common::cluster_settings::ClusteringSettings;
use crate::common::cluster_settings::variant_complexity;

impl ClusteringSettings {
    pub fn belongs(&self, cluster: &[Record], candidate: &Record) -> anyhow::Result<bool> {
        let Some(last) = cluster.last() else {
            return Ok(true);
        };
        if candidate.pos().saturating_sub(last.end()) <= self.max_gap.try_into()? {
            let gt_last = last.genotypes()?;
            let gt_candidate = candidate.genotypes()?;
            let ps_last = last.format(b"PS").integer();
            let ps_candidate = candidate.format(b"PS").integer();
            let mut pass = true;
            for sample in 0..candidate.sample_count() {
                let l = gt_last.get(sample as usize);
                let c = gt_candidate.get(sample as usize);
                if l != c {
                    // TODO more elaborate GT compatibility check
                    pass = false;
                    break;
                }
                if ps_last.as_ref().is_ok_and(|last| {
                    ps_candidate
                        .as_ref()
                        .is_ok_and(|candidate| **last != **candidate)
                }) {
                    // Different phasesets
                    pass = false;
                    break;
                }
            }
            Ok(pass)
        } else {
            Ok(false)
        }
    }

    pub fn is_cluster(&self, cluster: &[Record]) -> bool {
        if cluster.is_empty() {
            return false;
        }
        let first = cluster.first().unwrap();
        let last = cluster.last().unwrap();
        let span = first.pos().abs_diff(last.end()) as f64 + 1.0;
        let complexity: f64 = cluster.iter().flat_map(record_complexity).sum();
        self.is_valid_cluster(complexity, span, cluster.len())
    }
}

fn record_complexity(record: &Record) -> anyhow::Result<f64> {
    let alls = record.alleles();
    if alls.len() != 2 {
        anyhow::bail!("Record should have two alleles");
    }
    Ok(variant_complexity(alls[0].len(), alls[1].len()))
}