atglib 0.2.0

A library to handle transcripts for genomics and transcriptomics
Documentation
use std::collections::HashMap;

use serde::{Deserialize, Serialize};

use crate::models::Transcript;

/// A convinience wrapper to handle  large amounts of [`Transcript`]s
///
/// It allows fast lookup operation by gene or transcript name.
///
/// # Examples
///
/// ```rust
/// use atglib::models::{TranscriptBuilder, Transcripts};
///
/// let mut transcripts = Transcripts::new();
/// assert_eq!(transcripts.len(), 0);
///
/// transcripts.push(TranscriptBuilder::new()
///     .name("NM_001203247.2")
///     .chrom("chr7")
///     .gene("EZH2")
///     .strand(atglib::models::Strand::Minus)
///     .build()
///     .unwrap()
/// );
/// assert_eq!(transcripts.len(), 1);
///
/// assert_eq!(transcripts.by_name("NM_001203247.2").len(), 1);
/// assert_eq!(transcripts.by_gene("EZH2").len(), 1);
///
/// assert!(transcripts.by_name("Foo").is_empty());
/// assert!(transcripts.by_gene("Bar").is_empty());
/// ```
#[derive(Serialize, Deserialize)]
pub struct Transcripts {
    list: Vec<Transcript>,
    name: HashMap<String, Vec<usize>>,
    gene: HashMap<String, Vec<usize>>,
}

impl Transcripts {
    pub fn new() -> Self {
        Self {
            list: vec![],
            name: HashMap::new(),
            gene: HashMap::new(),
        }
    }

    pub fn with_capacity(capacity: usize) -> Self {
        Self {
            list: Vec::with_capacity(capacity),
            name: HashMap::with_capacity(capacity),
            gene: HashMap::with_capacity(capacity),
        }
    }

    /// Retrieve all [`Transcript`]s by their name / transcript-id
    ///
    /// Most transcripts exist only once, but some transcripts
    /// (e.g. NM_001370371.1) are on both X- and Y-chromosome.
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use atglib::models::{TranscriptBuilder, Transcripts};
    /// # let mut transcripts = Transcripts::new();
    /// # transcripts.push(TranscriptBuilder::new()
    /// #     .name("NM_001203247.2")
    /// #     .chrom("chr7")
    /// #     .gene("EZH2")
    /// #     .strand(atglib::models::Strand::Minus)
    /// #     .build()
    /// #     .unwrap()
    /// # );
    /// assert_eq!(transcripts.by_name("NM_001203247.2").len(), 1);
    /// assert!(transcripts.by_name("invalid_name").is_empty());
    /// ```
    pub fn by_name(&self, name: &str) -> Vec<&Transcript> {
        match self.name.get(name) {
            Some(ids) => {
                let mut res: Vec<&Transcript> = Vec::with_capacity(ids.len());
                for id in ids {
                    res.push(&self.list[*id]);
                }
                res
            }
            None => vec![],
        }
    }

    /// Retrieve all [`Transcript`]s of a gene
    ///
    /// # Examples
    ///
    /// ```rust
    /// # use atglib::models::{TranscriptBuilder, Transcripts};
    /// # let mut transcripts = Transcripts::new();
    /// # transcripts.push(TranscriptBuilder::new()
    /// #     .name("NM_001203247.2")
    /// #     .chrom("chr7")
    /// #     .gene("EZH2")
    /// #     .strand(atglib::models::Strand::Minus)
    /// #     .build()
    /// #     .unwrap()
    /// # );
    /// assert_eq!(transcripts.by_gene("EZH2").len(), 1);
    /// assert!(transcripts.by_gene("Invalid-name").is_empty());
    /// ```
    pub fn by_gene(&self, gene: &str) -> Vec<&Transcript> {
        match self.gene.get(gene) {
            Some(ids) => {
                let mut res: Vec<&Transcript> = Vec::with_capacity(ids.len());
                for id in ids {
                    res.push(&self.list[*id]);
                }
                res
            }
            None => vec![],
        }
    }

    /// Add another [`Transcript`]
    ///
    /// # Examples
    ///
    /// ```rust
    /// use atglib::models::{TranscriptBuilder, Transcripts};
    ///
    /// let mut transcripts = Transcripts::new();
    ///
    /// transcripts.push(TranscriptBuilder::new()
    ///     .name("NM_001203247.2")
    ///     .chrom("chr7")
    ///     .gene("EZH2")
    ///     .strand(atglib::models::Strand::Minus)
    ///     .build()
    ///     .unwrap()
    /// );
    ///
    /// transcripts.push(TranscriptBuilder::new()
    ///     .name("NM_001203247.3")
    ///     .chrom("chr7")
    ///     .gene("EZH2")
    ///     .strand(atglib::models::Strand::Minus)
    ///     .build()
    ///     .unwrap()
    /// );
    /// assert_eq!(transcripts.len(), 2);
    ///
    /// assert_eq!(transcripts.by_name("NM_001203247.2").len(), 1);
    /// assert_eq!(transcripts.by_name("NM_001203247.3").len(), 1);
    /// assert_eq!(transcripts.by_gene("EZH2").len(), 2);
    /// ```
    pub fn push(&mut self, record: Transcript) {
        let idx = self.list.len();

        match self.name.get_mut(record.name()) {
            Some(x) => x.push(idx),
            None => {
                self.name.insert(record.name().to_string(), vec![idx]);
            }
        }

        match self.gene.get_mut(record.gene()) {
            Some(x) => x.push(idx),
            None => {
                self.gene.insert(record.gene().to_string(), vec![idx]);
            }
        }

        self.list.push(record);
    }

    /// Returns the number of [`Transcript`]s in the object
    pub fn len(&self) -> usize {
        self.list.len()
    }

    /// Returns true if the object contains no transcripts.
    pub fn is_empty(&self) -> bool {
        self.list.is_empty()
    }

    /// Returns a vector of [`Transcript`]s
    pub fn as_vec(&self) -> &Vec<Transcript> {
        &self.list
    }

    /// Consumes and returns a vector of [`Transcript`]s
    pub fn to_vec(self) -> Vec<Transcript> {
        self.list
    }

    /// Returns a vector with the gene symbols of all transcripts
    pub fn genes(&self) -> Vec<&str> {
        // this `map` looks weird, but I don't know another way
        // to return a slice insteadt of &String
        self.gene.keys().map(|x| x.as_str()).collect()
    }
}

impl Default for Transcripts {
    fn default() -> Self {
        Self::new()
    }
}

impl IntoIterator for Transcripts {
    type Item = Transcript;
    type IntoIter = std::vec::IntoIter<Self::Item>;

    fn into_iter(self) -> Self::IntoIter {
        self.list.into_iter()
    }
}