twitcher 0.5.0

Find template switch mutations in genomic data
use std::borrow::Borrow;

use rust_htslib::bcf::Record;

pub use crate::common::cluster_settings::ClusteringSettings;
use crate::common::cluster_settings::variant_complexity;

impl ClusteringSettings {
    pub fn belongs<R: Borrow<Record>>(
        &self,
        last: Option<R>,
        candidate: R,
    ) -> anyhow::Result<bool> {
        let Some(last) = last else {
            return Ok(true);
        };
        Ok(candidate.borrow().pos().saturating_sub(last.borrow().end())
            <= self.max_gap.try_into()?)
    }

    pub fn is_cluster<R, F: Fn(&R) -> &Record, G: Fn(&R) -> Option<u32>>(
        &self,
        cluster: &[R],
        get_record: F,
        get_allele: G,
    ) -> bool {
        if cluster.is_empty() {
            return false;
        }
        let first = get_record(cluster.first().unwrap().borrow());
        let last = get_record(cluster.last().unwrap().borrow());
        let span = first.pos().abs_diff(last.end()) as f64 + 1.0;
        let complexity: f64 = cluster
            .iter()
            .map(|r| (get_record(r), get_allele(r)))
            .flat_map(|(r, a)| record_complexity(r, a))
            .sum();
        self.is_valid_cluster(complexity, span, cluster.len())
    }
}

fn record_complexity(record: &Record, allele: Option<u32>) -> anyhow::Result<f64> {
    let alls = record.alleles();
    if alls.len() < 2 {
        anyhow::bail!("Record should have at least two alleles");
    }
    if let Some(a) = allele {
        Ok(variant_complexity(alls[0].len(), alls[a as usize].len()))
    } else {
        Ok(alls[1..]
            .iter()
            .map(|a| variant_complexity(alls[0].len(), a.len()))
            .reduce(|acc, e| if e > acc { e } else { acc })
            .unwrap())
    }
}