rsvart 0.1.5

A small library for representing genomic variants and regions.
Documentation
use super::Contig;

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct GenomeBuildIdentifier {
    major_assembly: String,
    patch: String,
}

impl GenomeBuildIdentifier {
    pub fn new(major_assembly: String, patch: String) -> Self {
        GenomeBuildIdentifier {
            major_assembly,
            patch,
        }
    }

    pub fn major_assembly(&self) -> &str {
        &self.major_assembly
    }
    pub fn patch(&self) -> &str {
        &self.patch
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GenomeBuild<C> {
    id: GenomeBuildIdentifier,
    contigs: Vec<Contig<C>>,
}

impl<C> GenomeBuild<C> {
    pub fn new(id: GenomeBuildIdentifier, mut contigs: Vec<Contig<C>>) -> Self {
        contigs.sort_by(|l, r| l.name().cmp(r.name()));
        GenomeBuild { id, contigs }
    }

    pub fn id(&self) -> &GenomeBuildIdentifier {
        &self.id
    }

    pub fn contigs(&self) -> &[Contig<C>] {
        &self.contigs
    }

    pub fn contig_from_genbank(&self, name: &str) -> Option<&Contig<C>> {
        self.contigs
            .iter()
            .find(|&c| c.name().eq(name) || c.gen_bank_accession().eq(name))
    }

    pub fn contig_from_refseq(&self, name: &str) -> Option<&Contig<C>> {
        self.contigs
            .iter()
            .find(|&c| c.name().eq(name) || c.ref_seq_accession().eq(name))
    }

    pub fn contig_from_ucsc(&self, name: &str) -> Option<&Contig<C>> {
        self.contigs
            .iter()
            .find(|&c| c.name().eq(name) || c.ucsc_name().eq(name))
    }
}

#[cfg(test)]
mod test {
    use super::GenomeBuild;
    use super::GenomeBuildIdentifier;
    use crate::genomic::{AssignedMoleculeType, Contig, SequenceRole};
    use rstest::rstest;

    #[rstest]
    fn test_genome_build_identifier() {
        let id = GenomeBuildIdentifier::new("GRCh38".to_string(), "p13".to_string());
        assert_eq!(id.major_assembly(), "GRCh38");
        assert_eq!(id.patch(), "p13");
    }

    #[rstest]
    fn test_genome_build() {
        let id = GenomeBuildIdentifier::new("GRCh38".to_string(), "p13".to_string());
        let contigs: Vec<Contig<u8>> = get_few_contigs();
        let build = GenomeBuild::new(id, contigs);
        assert_eq!(build.id().major_assembly(), "GRCh38");
        assert_eq!(build.id().patch(), "p13");
        assert_eq!(build.contigs().len(), 3);
        assert_eq!(
            *build.contig_from_genbank("CM000664.1").unwrap(),
            *&build.contigs()[1]
        );
        assert_eq!(
            *build.contig_from_refseq("NC_000001.10").unwrap(),
            *&build.contigs()[0]
        );
        assert_eq!(
            *build.contig_from_ucsc("chr3").unwrap(),
            *&build.contigs()[2]
        );
    }

    fn get_few_contigs() -> Vec<Contig<u8>> {
        vec![
            Contig::new(
                "1".to_string(),
                SequenceRole::AssembledMolecule,
                "1".to_string(),
                AssignedMoleculeType::Chromosome,
                60,
                "CM000663.1".to_string(),
                "NC_000001.10".to_string(),
                "chr1".to_string(),
            )
            .unwrap(),
            Contig::new(
                "2".to_string(),
                SequenceRole::AssembledMolecule,
                "2".to_string(),
                AssignedMoleculeType::Chromosome,
                60,
                "CM000664.1".to_string(),
                "NC_000002.10".to_string(),
                "chr2".to_string(),
            )
            .unwrap(),
            Contig::new(
                "3".to_string(),
                SequenceRole::AssembledMolecule,
                "3".to_string(),
                AssignedMoleculeType::Chromosome,
                60,
                "CM000665.1".to_string(),
                "NC_000003.10".to_string(),
                "chr3".to_string(),
            )
            .unwrap(),
        ]
    }
}